00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef _TFIDFRETMETHOD_HPP
00015 #define _TFIDFRETMETHOD_HPP
00016
00017 #include "TextQueryRetMethod.hpp"
00018
00020 namespace TFIDFParameter {
00021
00022 enum TFMethod {RAWTF=0, LOGTF=1, BM25=2};
00023 struct WeightParam {
00024 TFMethod tf;
00025 double bm25K1;
00026 double bm25B;
00027 };
00028 struct FeedbackParam {
00029 int howManyTerms;
00030 double posCoeff;
00031 };
00032 static double defaultDocK1=1;
00033 static double defaultDocB = 0.5;
00034 static double defaultQryK1 = 1;
00035 static double defaultQryB = 0;
00036 static int defaultHowManyTerms = 50;
00037 static double defaultPosCoeff = 0.5;
00038 };
00039
00041 class TFIDFQueryRep : public ArrayQueryRep {
00042 public:
00043 TFIDFQueryRep(const TermQuery &qry, const Index &dbIndex, double *idfValue, TFIDFParameter::WeightParam ¶m);
00044
00045 virtual ~TFIDFQueryRep() {}
00046
00047 double queryTFWeight(const double rawTF) const;
00048 protected:
00049 TFIDFParameter::WeightParam &prm;
00050 double *idf;
00051 const Index &ind;
00052 };
00053
00055 class TFIDFDocRep : public DocumentRep {
00056 public:
00057 TFIDFDocRep(DOCID_T docID, const Index &dbIndex, double *idfValue,
00058 TFIDFParameter::WeightParam ¶m) :
00059 DocumentRep(docID), ind(dbIndex), prm(param), idf(idfValue) {
00060 }
00061 virtual ~TFIDFDocRep() { }
00062 virtual double termWeight(TERMID_T termID, const DocInfo *info) const{
00063 return (idf[termID]*docTFWeight(info->termCount()));
00064 }
00065 virtual double scoreConstant() const { return 0;}
00066
00067 double docTFWeight(const double rawTF) const;
00068 private:
00069
00070 const Index & ind;
00071 TFIDFParameter::WeightParam &prm;
00072 double *idf;
00073 };
00074
00075
00077
00078 class TFIDFRetMethod : public TextQueryRetMethod {
00079 public:
00080
00081 TFIDFRetMethod(const Index &dbIndex, ScoreAccumulator &accumulator);
00082 virtual ~TFIDFRetMethod() {delete [] idfV; delete scFunc;}
00083
00084 virtual TextQueryRep *computeTextQueryRep(const TermQuery &qry) {
00085 return (new TFIDFQueryRep(qry, ind, idfV, qryTFParam));
00086 }
00087
00088 virtual DocumentRep *computeDocRep(DOCID_T docID) {
00089 return (new TFIDFDocRep(docID, ind, idfV, docTFParam));
00090 }
00091 virtual ScoreFunction *scoreFunc() {
00092 return (scFunc);
00093 }
00094
00095
00096 virtual void updateTextQuery(TextQueryRep &qryRep, const DocIDSet &relDocs);
00097
00098 void setDocTFParam(TFIDFParameter::WeightParam &docTFWeightParam);
00099
00100 void setQueryTFParam(TFIDFParameter::WeightParam &queryTFWeightParam);
00101
00102 void setFeedbackParam(TFIDFParameter::FeedbackParam &feedbackParam);
00103
00104 static double BM25TF(const double rawTF, const double k1, const double b,
00105 const double docLen, const double avgDocLen);
00106
00107 protected:
00108 double *idfV;
00109 ScoreFunction *scFunc;
00110
00112
00113
00114 TFIDFParameter::WeightParam qryTFParam;
00115 TFIDFParameter::WeightParam docTFParam;
00116 TFIDFParameter::FeedbackParam fbParam;
00117
00119
00120 };
00121
00122
00123 inline void TFIDFRetMethod::setDocTFParam(TFIDFParameter::WeightParam &docTFWeightParam)
00124 {
00125 docTFParam = docTFWeightParam;
00126 }
00127
00128
00129
00130 inline void TFIDFRetMethod::setQueryTFParam(TFIDFParameter::WeightParam &queryTFWeightParam)
00131 {
00132 qryTFParam = queryTFWeightParam;
00133 }
00134
00135
00136 inline void TFIDFRetMethod::setFeedbackParam(TFIDFParameter::FeedbackParam &feedbackParam)
00137 {
00138 fbParam = feedbackParam;
00139 }
00140
00141
00142
00143 inline double TFIDFRetMethod ::BM25TF(const double rawTF, const double k1, const double b,
00144 const double docLen, const double avgDocLen)
00145 {
00146 double x= rawTF+k1*(1-b+b*docLen/avgDocLen);
00147 return (k1*rawTF/x);
00148 }
00149
00150
00151
00152 #endif
00153
00154
00155
00156
00157
00158
00159