Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

RetParamManager.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _RETRIEVALPARAMETER_HPP
00013 #define _RETRIEVALPARAMETER_HPP
00014 
00016 #include "Param.hpp"
00017 
00018 #include "TFIDFRetMethod.hpp"
00019 #include "OkapiRetMethod.hpp"
00020 #include "SimpleKLRetMethod.hpp"
00021 #include "CORIRetMethod.hpp"
00022 #include "CosSimRetMethod.hpp"
00023 #include "InQueryRetMethod.hpp"
00024 #include "IndriRetMethod.hpp"
00025 
00027 namespace RetrievalParameter {
00028 
00030 
00031 
00032   static String databaseIndex;
00034   static String retModel;
00036   static String textQuerySet;
00038   static String resultFile;
00040   static bool TRECresultFileFormat;
00042   static int fbDocCount;
00044   static int resultCount;
00046   static bool cacheDocReps;
00048   static bool useWorkingSet;
00050   static String workSetFile;
00051 
00053 
00054   static string getLower(char *parm, char *def) {
00055     string tmpString = ParamGetString(parm, def);
00056     // make it all lowercase
00057     for (int i = 0; i < tmpString.length(); i++)
00058       tmpString[i] = tolower(tmpString[i]);
00059     return tmpString;
00060   }
00061 
00062   static void get() {
00063     databaseIndex = ParamGetString("index","");
00064     retModel = getLower("retModel","kl");
00065     // backwards compatibility.
00066     if (retModel == "0") retModel = "tfidf";
00067     if (retModel == "1") retModel = "okapi";
00068     if (retModel == "2") retModel = "kl";
00069     if (retModel == "3") retModel = "inquery";
00070     if (retModel == "4") retModel = "cori_cs";
00071     if (retModel == "5") retModel = "cos";
00072     if (retModel == "6") retModel = "inq_struct";
00073     if (retModel == "7") retModel = "indri";
00074 
00075     string tmp = getLower("cacheDocReps", "true");
00076     cacheDocReps = (tmp == "true" || tmp == "1");
00077 
00078     tmp = getLower("useWorkingSet", "false"); 
00079     useWorkingSet = (tmp == "true" || tmp == "1");
00080     // working set file name
00081     workSetFile = ParamGetString("workingSetFile",""); 
00082     
00083     textQuerySet = ParamGetString("textQuery","");
00084     resultFile = ParamGetString("resultFile","");
00085 
00086     tmp = getLower("resultFormat","trec");
00087     TRECresultFileFormat = (tmp == "trec" || tmp == "1");
00088 
00089     // default being no feedback
00090     fbDocCount = ParamGetInt("feedbackDocCount",0); 
00091     resultCount = ParamGetInt("resultCount", 1000); 
00092     
00093   }
00094 };
00095 
00096 
00097 namespace TFIDFParameter {
00098 
00100 
00101   static WeightParam docTFPrm;
00102   static WeightParam qryTFPrm;
00103   static FeedbackParam fbPrm;
00105   
00106   static void get()
00107   {
00108     string tfmethod = RetrievalParameter::getLower("doc.tfMethod", "bm25");
00109     if (tfmethod == "rawtf") docTFPrm.tf = RAWTF;
00110     else if (tfmethod == "logf") docTFPrm.tf = LOGTF;    
00111     else if (tfmethod == "bm25") docTFPrm.tf = BM25;
00112 
00113     docTFPrm.bm25K1 = ParamGetDouble("doc.bm25K1",defaultDocK1);
00114     docTFPrm.bm25B = ParamGetDouble("doc.bm25B",defaultDocB);
00115 
00116     tfmethod = RetrievalParameter::getLower("query.tfMethod", "bm25");
00117     if (tfmethod == "rawtf") qryTFPrm.tf = RAWTF;
00118     else if (tfmethod == "logf") qryTFPrm.tf = LOGTF;    
00119     else if (tfmethod == "bm25") qryTFPrm.tf = BM25;
00120 
00121     qryTFPrm.bm25K1 = ParamGetDouble("query.bm25K1",defaultQryK1);
00122     qryTFPrm.bm25B = defaultQryB;
00123     
00124     fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00125     fbPrm.posCoeff = ParamGetDouble("feedbackPosCoeff", defaultPosCoeff); 
00126   }
00127 };
00128 
00129 namespace OkapiParameter {
00130 
00132 
00133   static TFParam tfPrm;
00134   static FeedbackParam fbPrm;
00136 
00137 
00138   static void get()
00139   {
00140     tfPrm.k1 = ParamGetDouble("BM25K1",defaultK1);
00141     tfPrm.b =  ParamGetDouble("BM25B",defaultB);
00142     tfPrm.k3 = ParamGetDouble("BM25K3", defaultK3);
00143     fbPrm.expQTF = ParamGetDouble("BM25QTF", defaultExpQTF);
00144     fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00145     
00146   }
00147 };
00148 
00149 namespace SimpleKLParameter {
00151 
00152   static SimpleKLParameter::DocSmoothParam docPrm;
00153   static SimpleKLParameter::QueryModelParam qryPrm;
00154   static String smoothSupportFile;
00156     
00157   static void get()
00158   {
00159     smoothSupportFile = ParamGetString("smoothSupportFile", "");
00160 
00161     string tmpString = RetrievalParameter::getLower("adjustedScoreMethod", 
00162                                                     "negativekld");
00163     if (tmpString == "querylikelihood" || tmpString == "ql") {
00164       qryPrm.adjScoreMethod = SimpleKLParameter::QUERYLIKELIHOOD;
00165     } else if (tmpString == "crossentropy" ||tmpString == "ce") {
00166       qryPrm.adjScoreMethod = SimpleKLParameter::CROSSENTROPY;
00167     } else if (tmpString == "negativekld" || tmpString == "-d") {
00168       qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00169     } else {
00170       cerr << "Unknown scoreMethod " << tmpString << ". Using NEGATIVEKLD" 
00171            << endl;
00172       qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00173     }
00174 
00175     tmpString = RetrievalParameter::getLower("smoothMethod", 
00176                                              "dirichletprior");
00177     if (tmpString == "jelinikmercer" || tmpString == "jm" || tmpString == "0")
00178       docPrm.smthMethod = SimpleKLParameter::JELINEKMERCER;
00179     else if (tmpString == "dirichletprior" || tmpString == "dir" || 
00180              tmpString == "1")
00181       docPrm.smthMethod = SimpleKLParameter::DIRICHLETPRIOR;
00182     else if (tmpString == "absolutediscount" || tmpString == "ad" || 
00183              tmpString == "2")
00184       docPrm.smthMethod = SimpleKLParameter::ABSOLUTEDISCOUNT;
00185     else if (tmpString == "twostage" || tmpString == "2s" || tmpString == "3")
00186       docPrm.smthMethod = SimpleKLParameter::TWOSTAGE;
00187     else {
00188       cerr << "Unknown smoothMethod " << tmpString << ". Using DIRICHLET" 
00189            << endl;
00190       docPrm.smthMethod = SimpleKLParameter::defaultSmoothMethod;
00191     }
00192     
00193 
00194     tmpString = RetrievalParameter::getLower("smoothStrategy", "interpolate");
00195     if (tmpString == "interpolate" || tmpString == "int" || tmpString == "0")
00196       docPrm.smthStrategy= SimpleKLParameter::INTERPOLATE;
00197     else if (tmpString == "backoff" || tmpString == "bo" || tmpString == "1")
00198       docPrm.smthStrategy= SimpleKLParameter::BACKOFF;
00199     else {
00200       cerr << "Unknown smoothStrategy " << tmpString << ". Using INTERPOLATE" 
00201            << endl;
00202       docPrm.smthStrategy= SimpleKLParameter::defaultSmoothStrategy;
00203     }
00204     
00205 
00206     docPrm.ADDelta = ParamGetDouble("discountDelta",defaultADDelta);
00207     docPrm.JMLambda = ParamGetDouble("JelinekMercerLambda",defaultJMLambda);
00208     docPrm.DirPrior = ParamGetDouble("DirichletPrior",defaultDirPrior);
00209     
00210     tmpString = RetrievalParameter::getLower("queryUpdateMethod", "mixture");
00211 
00212     if (tmpString == "mixture" || tmpString == "mix" || tmpString == "0")
00213       qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00214     else if (tmpString == "divmin" || tmpString == "div" || tmpString == "1")
00215       qryPrm.fbMethod = SimpleKLParameter::DIVMIN;
00216     else if (tmpString == "markovchain" || tmpString == "mc" || 
00217              tmpString == "2")
00218       qryPrm.fbMethod = SimpleKLParameter::MARKOVCHAIN;
00219     else if (tmpString == "relevancemodel1" || tmpString == "rm1" || 
00220              tmpString == "3")
00221       qryPrm.fbMethod = SimpleKLParameter::RM1;
00222     else if (tmpString == "relevancemodel2" || tmpString == "rm2" || 
00223              tmpString == "4")
00224       qryPrm.fbMethod = SimpleKLParameter::RM2;
00225     else {
00226       cerr << "Unknown queryUpdateMethod " << tmpString 
00227            << ". Using MIXTURE" 
00228            << endl;
00229       qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00230     }
00231     
00232 
00233     qryPrm.fbCoeff = ParamGetDouble("feedbackCoefficient", defaultFBCoeff);
00234     qryPrm.fbPrTh = ParamGetDouble("feedbackProbThresh", defaultFBPrTh);
00235     qryPrm.fbPrSumTh = ParamGetDouble("feedbackProbSumThresh",
00236                                       defaultFBPrSumTh);
00237     qryPrm.fbTermCount = ParamGetInt("feedbackTermCount", defaultFBTermCount);
00238     qryPrm.fbMixtureNoise = ParamGetDouble("feedbackMixtureNoise",
00239                                            defaultFBMixNoise);
00240     qryPrm.emIterations = ParamGetInt("emIterations", defaultEMIterations);
00241                                               
00242   }
00243 };
00244 
00245 namespace CORIParameter {
00246   static String collectionCounts;
00247   static double cstffactor;
00248   static double cstfbaseline;
00249   static double doctffactor;
00250   static double doctfbaseline;
00251   static void get() {
00252     collectionCounts = ParamGetString("collCounts", "USE_INDEX_COUNTS");
00253     cstffactor = ParamGetDouble("CSCTF_factor", 150);
00254     cstfbaseline = ParamGetDouble("CSCTF_baseline", 50);
00255     doctffactor = ParamGetDouble("DOCCTF_factor", 1.5);
00256     doctfbaseline = ParamGetDouble("DOCCTF_baseline", 0.5);
00257   }
00258 };
00259 
00260 namespace CosSimParameter {
00261 
00263 
00264   static FeedbackParam fbPrm;
00265   static String L2NormFile;
00267   
00268   static void get()
00269   {
00270     fbPrm.howManyTerms = ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00271     fbPrm.posCoeff = ParamGetDouble("feedbackPosCoeff", defaultPosCoeff); 
00272     L2NormFile = ParamGetString("L2File", defaultL2File);
00273   }
00274 };
00276 namespace InQueryParameter {
00279   static double fbCoeff = 0.5;
00281   static int fbTermCount = 50;
00283   static double defaultBelief = 0.4;
00285   static bool cacheIDF = false;
00286   static void get()
00287   {
00288     defaultBelief = ParamGetDouble("defaultBelief", defaultBelief);
00289     fbCoeff = ParamGetDouble("feedbackPosCoeff", fbCoeff);
00290     fbTermCount = ParamGetInt("feedbackTermCount", fbTermCount);
00291     string tmpString = RetrievalParameter::getLower("cacheIDF", "true");
00292     cacheIDF = (tmpString == "true" || tmpString == "1");
00293   }
00294 };
00295 
00297 namespace IndriParameter {
00300   static Parameters params;
00302   static string stopwords;
00304   static void get()
00305   {
00306     RetrievalParameter::get();
00307     // stopwords
00308     stopwords = ParamGetString("stopwords", "");
00309 
00310     // RM expansion parameters
00311     int fbTerms = ParamGetInt("feedbackTermCount", 10);
00312     double fbOrigWt = ParamGetDouble("fbOrigWt", 0.5);
00313     double fbMu = ParamGetDouble("fbMu", 0);
00314     params.set( "fbDocs" , RetrievalParameter::fbDocCount);
00315     params.set( "fbTerms" , fbTerms );
00316     params.set( "fbOrigWt", fbOrigWt);
00317     params.set( "fbMu", fbMu );
00318     // set retrieval rules -- NB limit to one.
00319     string rule = ParamGetString("rule", "method:dirichlet,mu:2500");
00320     params.set("rule", rule);
00321     // results count
00322     params.set( "count", RetrievalParameter::resultCount );
00323   }
00324 };
00325 
00326 #endif

Generated on Wed Nov 3 12:59:03 2004 for Lemur Toolkit by doxygen1.2.18