Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

CORIRetMethod.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _CORIRETMETHOD_HPP
00013 #define _CORIRETMETHOD_HPP
00014 
00015 
00016 #include "UnigramLM.hpp"
00017 #include "SimpleKLDocModel.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include "Param.hpp"
00020 #include <math.h>
00021 #include <iostream>
00022 
00024 #define CSTFBASELINE 50
00025 #define CSTFFACTOR 150
00026 #define DOCTFBASELINE 0.5
00027 #define DOCTFFACTOR 1.5
00028 #define MINBELIEF 0.4
00029 
00030 class CORIQueryRep : public ArrayQueryRep {
00031 public:
00032   CORIQueryRep(const TermQuery & qry, const Index & dbIndex);
00033   virtual ~CORIQueryRep() {}
00034 
00035 protected:
00036   const Index & ind;
00037 };
00038 
00039 class CORIDocRep : public DocumentRep {
00040 public:
00041   CORIDocRep(DOCID_T docID, const Index & dbIndex, double * cwRatio, 
00042              double TFfact = 150, double TFbase = 50, 
00043              const SimpleKLDocModel * smoother = NULL,
00044              const UnigramLM * collectLM = NULL);
00045   virtual ~CORIDocRep() { }
00046   virtual double termWeight(TERMID_T termID, const DocInfo * info) const ;
00047 
00048   virtual double scoreConstant() const { return 0; }
00049 
00050 private:
00051 
00052   const Index & ind;
00053 
00054   int * cwCounts;
00055 
00056   const SimpleKLDocModel * dfSmooth;
00057   const UnigramLM * collLM;
00058 
00059   double c05;
00060   double idiv;
00061   double tnorm;
00062 };
00063 
00064 
00065 
00066 class CORIRetMethod : public TextQueryRetMethod {
00067 public:
00068 
00069   CORIRetMethod(const Index & dbIndex, ScoreAccumulator &accumulator, 
00070                    String cwName, int isCSIndex=0,
00071                 const SimpleKLDocModel ** smoothers = NULL, 
00072                 const UnigramLM * collectLM = NULL);
00073   ~CORIRetMethod() { delete scFunc; delete [] cwRatio; }
00074 
00075   virtual TextQueryRep * computeTextQueryRep(const TermQuery & qry) {
00076     return new CORIQueryRep(qry, ind);
00077   }
00078   virtual DocumentRep * computeDocRep(DOCID_T docID) { 
00079     if (dfSmooth != NULL) {
00080       return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline, dfSmooth[docID], collLM);
00081     }
00082     return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline);
00083   }
00084   virtual ScoreFunction * scoreFunc() {
00085     return scFunc;
00086   }
00087 
00088   virtual void scoreCollection(const QueryRep &qry, IndexedRealVector &results);
00089 
00090   virtual void updateTextQuery(TextQueryRep &qryRep, const DocIDSet &relDocs) { }
00091   
00092   void setTFFactor(double tf) { tffactor = tf; }
00093   void setTFBaseline(double tf) { tfbaseline = tf; }
00094 
00095 protected:
00096 
00097   ScoreFunction * scFunc;
00098   const SimpleKLDocModel ** dfSmooth;
00099   const UnigramLM * collLM;
00100 
00101   double * cwRatio;
00102   double tffactor;
00103   double tfbaseline;
00104   
00105 };
00106 
00107 class CORIScoreFunc : public ScoreFunction {
00108 public:
00109   CORIScoreFunc(const Index & index) : ind(index) {
00110     rmax=0;
00111     double dc = ind.docCount();
00112     c05 = dc + 0.5;
00113     idiv = log(dc + 1);
00114     //    qr=NULL;
00115     first=0;
00116   }
00117 
00118   virtual double adjustedScore(double origScore, const TextQueryRep * qRep,
00119                                const DocumentRep * dRep) const {
00120     /*
00121     if (qr != qRep) {
00122       qr = qRep;
00123       
00124       qRep->startIteration();
00125       rmax = 0;
00126       double qw = 0;
00127       while (qRep->hasMore()) {
00128         TERMID_T qtid = qRep->nextTerm()->id();
00129         rmax += (1-MINBELIEF)*(log(c05 / ind.docCount(qtid)) / idiv);
00130       }
00131     }
00132     if ((origScore/rmax)>=1){
00133       cout<<"!!!!!!!!!"<<endl;
00134       cout<<origScore<<" "<<rmax<<" "<<(origScore / rmax)<<endl;
00135       }*/
00136     //return (origScore / rmax);
00137     return origScore;
00138   }
00139 
00140 private:
00141   const Index & ind;
00142   int first;
00143   //TextQueryRep * qr;
00144   double rmax;
00145   double c05;
00146   double idiv;
00147 };
00148 
00149 
00150 #endif /* _CORIRETMETHOD_HPP */

Generated on Wed Nov 3 12:58:53 2004 for Lemur Toolkit by doxygen1.2.18