00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _CORIRETMETHOD_HPP
00013 #define _CORIRETMETHOD_HPP
00014
00015
00016 #include "UnigramLM.hpp"
00017 #include "SimpleKLDocModel.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include "Param.hpp"
00020 #include <math.h>
00021 #include <iostream>
00022
00024 #define CSTFBASELINE 50
00025 #define CSTFFACTOR 150
00026 #define DOCTFBASELINE 0.5
00027 #define DOCTFFACTOR 1.5
00028 #define MINBELIEF 0.4
00029
00030 class CORIQueryRep : public ArrayQueryRep {
00031 public:
00032 CORIQueryRep(const TermQuery & qry, const Index & dbIndex);
00033 virtual ~CORIQueryRep() {}
00034
00035 protected:
00036 const Index & ind;
00037 };
00038
00039 class CORIDocRep : public DocumentRep {
00040 public:
00041 CORIDocRep(DOCID_T docID, const Index & dbIndex, double * cwRatio,
00042 double TFfact = 150, double TFbase = 50,
00043 const SimpleKLDocModel * smoother = NULL,
00044 const UnigramLM * collectLM = NULL);
00045 virtual ~CORIDocRep() { }
00046 virtual double termWeight(TERMID_T termID, const DocInfo * info) const ;
00047
00048 virtual double scoreConstant() const { return 0; }
00049
00050 private:
00051
00052 const Index & ind;
00053
00054 int * cwCounts;
00055
00056 const SimpleKLDocModel * dfSmooth;
00057 const UnigramLM * collLM;
00058
00059 double c05;
00060 double idiv;
00061 double tnorm;
00062 };
00063
00064
00065
00066 class CORIRetMethod : public TextQueryRetMethod {
00067 public:
00068
00069 CORIRetMethod(const Index & dbIndex, ScoreAccumulator &accumulator,
00070 String cwName, int isCSIndex=0,
00071 const SimpleKLDocModel ** smoothers = NULL,
00072 const UnigramLM * collectLM = NULL);
00073 ~CORIRetMethod() { delete scFunc; delete [] cwRatio; }
00074
00075 virtual TextQueryRep * computeTextQueryRep(const TermQuery & qry) {
00076 return new CORIQueryRep(qry, ind);
00077 }
00078 virtual DocumentRep * computeDocRep(DOCID_T docID) {
00079 if (dfSmooth != NULL) {
00080 return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline, dfSmooth[docID], collLM);
00081 }
00082 return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline);
00083 }
00084 virtual ScoreFunction * scoreFunc() {
00085 return scFunc;
00086 }
00087
00088 virtual void scoreCollection(const QueryRep &qry, IndexedRealVector &results);
00089
00090 virtual void updateTextQuery(TextQueryRep &qryRep, const DocIDSet &relDocs) { }
00091
00092 void setTFFactor(double tf) { tffactor = tf; }
00093 void setTFBaseline(double tf) { tfbaseline = tf; }
00094
00095 protected:
00096
00097 ScoreFunction * scFunc;
00098 const SimpleKLDocModel ** dfSmooth;
00099 const UnigramLM * collLM;
00100
00101 double * cwRatio;
00102 double tffactor;
00103 double tfbaseline;
00104
00105 };
00106
00107 class CORIScoreFunc : public ScoreFunction {
00108 public:
00109 CORIScoreFunc(const Index & index) : ind(index) {
00110 rmax=0;
00111 double dc = ind.docCount();
00112 c05 = dc + 0.5;
00113 idiv = log(dc + 1);
00114
00115 first=0;
00116 }
00117
00118 virtual double adjustedScore(double origScore, const TextQueryRep * qRep,
00119 const DocumentRep * dRep) const {
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137 return origScore;
00138 }
00139
00140 private:
00141 const Index & ind;
00142 int first;
00143
00144 double rmax;
00145 double c05;
00146 double idiv;
00147 };
00148
00149
00150 #endif