Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

StructQryDocRep.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014 #include "DocumentRep.hpp"
00015 #include <cmath>
00016 
00019 class StructQryDocRep : public DocumentRep {
00020 public:
00022   StructQryDocRep(DOCID_T docID, double *idfValue, int docLength, int docCount,
00023                   double docLengthAverage, double db) : 
00024     DocumentRep(docID), did(docID), idf(idfValue), end(docLength),
00025     docEnd(docLength), size(docLength), start(0),
00026     dla(docLengthAverage), defaultBelief(db) {
00027     oneMinusDB = 1 - defaultBelief;
00028     denom = log(docCount + 1.0);
00029     numer = docCount + 0.5;
00030   }
00032   virtual ~StructQryDocRep() {}
00034   virtual double termWeight(TERMID_T termID, const DocInfo *info) const { return 0;}
00036   virtual double termWeight(TERMID_T termID, double dtf, int df) const{
00037     if (idf)
00038       return beliefScore(dtf, idf[termID]);
00039     else
00040       return beliefScore(dtf, computeIdfScore(df));
00041   }
00043   virtual double scoreConstant() const { return 0;}
00044 
00046   void startPassageIteration(int sz) const {
00047     size = sz;
00048     increment = size/2;
00049     start = 0;
00050     end = size < docEnd ? size : docEnd;
00051   }
00053   bool hasMorePassage() const {
00054     // still some terms in the list.
00055     return(start < docEnd);
00056   }
00057 
00059   void nextPassage() const{
00060     if(start + increment < docEnd)
00061       start += increment;
00062     else
00063       start = docEnd;
00064     end = (start + size) < docEnd ? (start + size) : docEnd;
00065   }
00066 
00069   double computeIdfScore(double df) const {
00070     return log(numer/df)/denom;
00071   }
00072 
00074   double beliefScore(double df, double idf) const {
00075     return (defaultBelief + oneMinusDB
00076             * (df / (df + 0.5 + 1.5* ((end - start)/dla))) * idf);
00077   }
00078 
00080   DOCID_T did;
00082   mutable int start; 
00084   mutable int end;
00085 
00086 private:
00088   double *idf;
00090   mutable int size; 
00092   mutable int increment; 
00094   int docEnd;  
00096   double dla;
00098   double numer, denom;
00100   double defaultBelief, oneMinusDB;
00101 };
00102 #endif

Generated on Wed Nov 3 12:59:04 2004 for Lemur Toolkit by doxygen1.2.18