00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014 #include "DocumentRep.hpp"
00015 #include <cmath>
00016
00019 class StructQryDocRep : public DocumentRep {
00020 public:
00022 StructQryDocRep(DOCID_T docID, double *idfValue, int docLength, int docCount,
00023 double docLengthAverage, double db) :
00024 DocumentRep(docID), did(docID), idf(idfValue), end(docLength),
00025 docEnd(docLength), size(docLength), start(0),
00026 dla(docLengthAverage), defaultBelief(db) {
00027 oneMinusDB = 1 - defaultBelief;
00028 denom = log(docCount + 1.0);
00029 numer = docCount + 0.5;
00030 }
00032 virtual ~StructQryDocRep() {}
00034 virtual double termWeight(TERMID_T termID, const DocInfo *info) const { return 0;}
00036 virtual double termWeight(TERMID_T termID, double dtf, int df) const{
00037 if (idf)
00038 return beliefScore(dtf, idf[termID]);
00039 else
00040 return beliefScore(dtf, computeIdfScore(df));
00041 }
00043 virtual double scoreConstant() const { return 0;}
00044
00046 void startPassageIteration(int sz) const {
00047 size = sz;
00048 increment = size/2;
00049 start = 0;
00050 end = size < docEnd ? size : docEnd;
00051 }
00053 bool hasMorePassage() const {
00054
00055 return(start < docEnd);
00056 }
00057
00059 void nextPassage() const{
00060 if(start + increment < docEnd)
00061 start += increment;
00062 else
00063 start = docEnd;
00064 end = (start + size) < docEnd ? (start + size) : docEnd;
00065 }
00066
00069 double computeIdfScore(double df) const {
00070 return log(numer/df)/denom;
00071 }
00072
00074 double beliefScore(double df, double idf) const {
00075 return (defaultBelief + oneMinusDB
00076 * (df / (df + 0.5 + 1.5* ((end - start)/dla))) * idf);
00077 }
00078
00080 DOCID_T did;
00082 mutable int start;
00084 mutable int end;
00085
00086 private:
00088 double *idf;
00090 mutable int size;
00092 mutable int increment;
00094 int docEnd;
00096 double dla;
00098 double numer, denom;
00100 double defaultBelief, oneMinusDB;
00101 };
00102 #endif