Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

OkapiRetMethod.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _OKAPIRETMETHOD_HPP
00014 #define _OKAPIRETMETHOD_HPP
00015 
00016 #include "TextQueryRep.hpp"
00017 #include "ScoreFunction.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include <cmath>
00020 
00022 
00023 namespace OkapiParameter {
00024   struct TFParam {
00025     double k1;
00026     double b;
00027     double k3;
00028   }; 
00029 
00030   static double defaultK1 = 1.2;
00031   static double defaultB = 0.75;
00032   static double defaultK3 = 7;
00033   
00034   struct FeedbackParam {
00036     double expQTF; // expanded query term TF
00038     int howManyTerms;
00039   };
00040 
00041   static double defaultExpQTF = 0.5;
00042   static int defaultHowManyTerms = 50;
00043 };
00044 
00046 
00047 class OkapiQueryTerm : public QueryTerm {
00048 public:
00049   OkapiQueryTerm(TERMID_T termID, double count, int pEstCount, double paramK3) : QueryTerm(termID, count), pEst(pEstCount), k3(paramK3) {
00050   }
00052   virtual int pEstCount() const { return pEst;}
00053 
00055   virtual double weight() const { 
00056     return ((k3+1)*w/(k3+w));
00057   } 
00058 private:
00059   int pEst;
00060   double k3;
00061 };
00062 
00064 
00065 class OkapiScoreFunc : public ScoreFunction {
00066 public:
00067   OkapiScoreFunc(const Index &dbIndex): ind(dbIndex) {}
00068   virtual double matchedTermWeight(const QueryTerm *qTerm, const TextQueryRep *qRep, const DocInfo *info, const DocumentRep *dRep) const;
00069 protected:
00070   const Index &ind;
00071 };
00072 
00073 
00075 class OkapiQueryRep : public ArrayQueryRep {
00076 public:
00077   // initial query constructor, no feedback docs assumed
00078   OkapiQueryRep(const TermQuery &qry, const Index &dbIndex, double paramK3);
00079 
00080   virtual ~OkapiQueryRep() { delete [] pEst; }
00082   int pNormCount() const { return pNorm;}
00084   void setPNormCount(int count) { pNorm = count;}
00086   void incPEst(int wdIndex, int val) { pEst[wdIndex]+=val;}
00087 protected:
00088   virtual QueryTerm *makeQueryTerm(TERMID_T wdIndex, double wdCount) const{
00089     return (new OkapiQueryTerm(wdIndex, wdCount, pEst[wdIndex], k3));
00090   }
00091   double k3;
00092   int pNorm;
00093   int *pEst;
00094 };
00095 
00097 
00098 class OkapiDocRep : public DocumentRep {
00099 public:
00100   OkapiDocRep(DOCID_T docID, const Index &dbIndex, OkapiParameter::TFParam &param) : DocumentRep(docID, dbIndex.docLength(docID)), ind(dbIndex),
00101   prm(param) {
00102   }
00103   virtual ~OkapiDocRep() { }
00104   virtual double termWeight(TERMID_T termID, const DocInfo *info) const;
00105   double BM25TF(double rawTF, double docLength) const;
00106   virtual double scoreConstant() const { return 0;}
00107 protected:
00108   const Index &ind;
00109   OkapiParameter::TFParam &prm;
00110 };
00111 
00113 
00114 class OkapiRetMethod : public TextQueryRetMethod  {
00115 public:
00116 
00117 
00118   OkapiRetMethod(const Index &dbIndex, ScoreAccumulator &accumulator);
00119 
00120   virtual ~OkapiRetMethod() { delete scFunc;}
00121 
00122   virtual TextQueryRep *computeTextQueryRep(const TermQuery &qry) {
00123     return (new OkapiQueryRep(qry, ind, tfParam.k3));
00124   }
00125 
00126   virtual DocumentRep *computeDocRep(DOCID_T docID) {
00127     return (new OkapiDocRep(docID, ind, tfParam));
00128   }
00129 
00130   virtual ScoreFunction *scoreFunc();
00131 
00133   virtual void updateTextQuery(TextQueryRep &origRep, const DocIDSet &relDocs);
00134 
00135   void setTFParam(OkapiParameter::TFParam &tfWeightParam);
00136 
00137   void setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam);
00138 
00140   static double RSJWeight(double r, double R, 
00141 
00142                           double n, double N) {
00143     return (log ((r+0.5)*(N-n-R+r+0.5)/((R-r+0.5)*(n-r+0.5))));
00144   }
00145 
00146 protected:
00147   OkapiScoreFunc *scFunc;
00148 
00149   OkapiParameter::TFParam tfParam;
00150   OkapiParameter::FeedbackParam fbParam;
00151 
00152 
00153 };
00154 
00155 
00156 inline void OkapiRetMethod::setTFParam(OkapiParameter::TFParam &tfWeightParam)
00157 {
00158   tfParam = tfWeightParam;
00159 }
00160 
00161 inline void OkapiRetMethod::setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam)
00162 {
00163   fbParam = feedbackParam;
00164 }
00165 
00166 inline double OkapiDocRep::BM25TF(double rawTF, double docLength) const 
00167 {
00168   return ((prm.k1+1)*rawTF/(rawTF +  prm.k1*(1-prm.b + prm.b*docLength/ind.docLengthAvg())));
00169 }
00170 
00171 
00172 inline double OkapiDocRep::termWeight(TERMID_T termID, const DocInfo *info) const
00173 {
00174   return BM25TF(info->termCount(), ind.docLength(info->docID()));
00175 } 
00176 
00177 
00178 
00179 
00180 inline ScoreFunction *OkapiRetMethod::scoreFunc()
00181 {
00182   return scFunc;
00183 }
00184 
00185 #endif /* _OKAPIRETMETHOD_HPP */
00186 
00187 
00188 
00189 
00190 

Generated on Wed Nov 3 12:59:00 2004 for Lemur Toolkit by doxygen1.2.18