00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _OKAPIRETMETHOD_HPP
00014 #define _OKAPIRETMETHOD_HPP
00015
00016 #include "TextQueryRep.hpp"
00017 #include "ScoreFunction.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include <cmath>
00020
00022
00023 namespace OkapiParameter {
00024 struct TFParam {
00025 double k1;
00026 double b;
00027 double k3;
00028 };
00029
00030 static double defaultK1 = 1.2;
00031 static double defaultB = 0.75;
00032 static double defaultK3 = 7;
00033
00034 struct FeedbackParam {
00036 double expQTF;
00038
00039 };
00040
00041 static double defaultExpQTF = 0.5;
00042 static int defaultHowManyTerms = 50;
00043 };
00044
00046
00047 class OkapiQueryTerm : public QueryTerm {
00048 public:
00049 OkapiQueryTerm(TERMID_T termID, double count, int pEstCount, double paramK3) : QueryTerm(termID, count), pEst(pEstCount), k3(paramK3) {
00050 }
00052 virtual int pEstCount() const { return pEst;}
00053
00055 virtual double weight() const {
00056 return ((k3+1)*w/(k3+w));
00057 }
00058 private:
00059 int pEst;
00060 double k3;
00061 };
00062
00064
00065 class OkapiScoreFunc : public ScoreFunction {
00066 public:
00067 OkapiScoreFunc(const Index &dbIndex): ind(dbIndex) {}
00068 virtual double matchedTermWeight(const QueryTerm *qTerm, const TextQueryRep *qRep, const DocInfo *info, const DocumentRep *dRep) const;
00069 protected:
00070 const Index &ind;
00071 };
00072
00073
00075 class OkapiQueryRep : public ArrayQueryRep {
00076 public:
00077
00078 OkapiQueryRep(const TermQuery &qry, const Index &dbIndex, double paramK3);
00079
00080 virtual ~OkapiQueryRep() { delete [] pEst; }
00082 int pNormCount() const { return pNorm;}
00084 void setPNormCount(int count) { pNorm = count;}
00086 void incPEst(int wdIndex, int val) { pEst[wdIndex]+=val;}
00087 protected:
00088 virtual QueryTerm *makeQueryTerm(TERMID_T wdIndex, double wdCount) const{
00089 return (new OkapiQueryTerm(wdIndex, wdCount, pEst[wdIndex], k3));
00090 }
00091 double k3;
00092 int pNorm;
00093 int *pEst;
00094 };
00095
00097
00098 class OkapiDocRep : public DocumentRep {
00099 public:
00100 OkapiDocRep(DOCID_T docID, const Index &dbIndex, OkapiParameter::TFParam ¶m) : DocumentRep(docID, dbIndex.docLength(docID)), ind(dbIndex),
00101 prm(param) {
00102 }
00103 virtual ~OkapiDocRep() { }
00104 virtual double termWeight(TERMID_T termID, const DocInfo *info) const;
00105 double BM25TF(double rawTF, double docLength) const;
00106 virtual double scoreConstant() const { return 0;}
00107 protected:
00108 const Index &ind;
00109 OkapiParameter::TFParam &prm;
00110 };
00111
00113
00114 class OkapiRetMethod : public TextQueryRetMethod {
00115 public:
00116
00117
00118 OkapiRetMethod(const Index &dbIndex, ScoreAccumulator &accumulator);
00119
00120 virtual ~OkapiRetMethod() { delete scFunc;}
00121
00122 virtual TextQueryRep *computeTextQueryRep(const TermQuery &qry) {
00123 return (new OkapiQueryRep(qry, ind, tfParam.k3));
00124 }
00125
00126 virtual DocumentRep *computeDocRep(DOCID_T docID) {
00127 return (new OkapiDocRep(docID, ind, tfParam));
00128 }
00129
00130 virtual ScoreFunction *scoreFunc();
00131
00133 virtual void updateTextQuery(TextQueryRep &origRep, const DocIDSet &relDocs);
00134
00135 void setTFParam(OkapiParameter::TFParam &tfWeightParam);
00136
00137 void setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam);
00138
00140 static double RSJWeight(double r, double R,
00141
00142 double n, double N) {
00143 return (log ((r+0.5)*(N-n-R+r+0.5)/((R-r+0.5)*(n-r+0.5))));
00144 }
00145
00146 protected:
00147 OkapiScoreFunc *scFunc;
00148
00149 OkapiParameter::TFParam tfParam;
00150 OkapiParameter::FeedbackParam fbParam;
00151
00152
00153 };
00154
00155
00156 inline void OkapiRetMethod::setTFParam(OkapiParameter::TFParam &tfWeightParam)
00157 {
00158 tfParam = tfWeightParam;
00159 }
00160
00161 inline void OkapiRetMethod::setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam)
00162 {
00163 fbParam = feedbackParam;
00164 }
00165
00166 inline double OkapiDocRep::BM25TF(double rawTF, double docLength) const
00167 {
00168 return ((prm.k1+1)*rawTF/(rawTF + prm.k1*(1-prm.b + prm.b*docLength/ind.docLengthAvg())));
00169 }
00170
00171
00172 inline double OkapiDocRep::termWeight(TERMID_T termID, const DocInfo *info) const
00173 {
00174 return BM25TF(info->termCount(), ind.docLength(info->docID()));
00175 }
00176
00177
00178
00179
00180 inline ScoreFunction *OkapiRetMethod::scoreFunc()
00181 {
00182 return scFunc;
00183 }
00184
00185 #endif
00186
00187
00188
00189
00190