Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

QryBasedSampler.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _QRYBASEDSAMPLER_HPP
00013 #define _QRYBASEDSAMPLER_HPP
00014 
00015 
00016 
00017 #include "FreqCounter.hpp"
00018 #include "DBManager.hpp"
00019 
00021 //typedef stringset docidset;
00022 typedef set<docid_t, less<string> > docidset;
00023 
00025 #define T_NDOCS 1
00026 
00027 #define T_NWORDS 2
00028 
00029 #define T_NQRYS 4
00030 
00034 class QryBasedSampler {
00035 public:
00036   QryBasedSampler();
00037   ~QryBasedSampler();
00038 
00040   bool probe(const char * initQuery);
00041   
00043   void setDBManager(const DBManager * database);
00044 
00046   const DBManager * getDBManager() const;
00047 
00048 
00051   void setFreqCounter(FreqCounter * counter);
00052 
00054   const FreqCounter * getFreqCounter() const;
00055 
00056 
00060   void setOutputPrefix(const string &prefix);
00061   
00063   const string &getOutputPrefix() const;
00064 
00066   void setNumDocs(int n);
00067 
00069   int getNumDocs() const;
00070 
00071 
00073   void setNumWords(int n);
00074 
00076   int getNumWords() const;
00077 
00078 
00080   void setNumQueries(int n);
00081 
00083   int getNumQueries() const;
00084 
00085 
00092   void setTermMode(int m);
00093 
00095   int getTermMode() const;
00096   
00097 
00099   void setDocsPerQuery(int n);
00100   
00102   int getDocsPerQuery() const;
00103 
00104 
00105 private:
00106 
00107   /* for querying a db */
00108   const DBManager * db;
00109 
00110 
00111   /* for building a description of a db */
00112   FreqCounter * freqCounter;
00113 
00114 
00115   /* output prefix for filenames */
00116   string outputPrefix;
00117 
00118 
00119   /* termination mode of the probe -
00120    * either T_NDOCS or T_NWORDS */
00121   int termMode;
00122   
00123   /* number unique docs to retrieve - only used if
00124    * termMode == T_NDOCS */
00125   int numDocs;
00126 
00127   /* number unique words to retrieve - only used if
00128    * termMode == T_NWORDS */
00129   int numWords;
00130 
00131   /* number of queries to run - only used if
00132    * termMode == T_NQRYS */
00133   int numQueries;
00134 
00135   /* documents per query to use */
00136   int docsPerQuery;
00137 
00138   /* stores the ids of the document already retrieved
00139    * from the system.  used to prevent parsing
00140    * a document multiple times */
00141   docidset seenDocs;
00142 };
00143 
00144 #endif

Generated on Wed Nov 3 12:59:02 2004 for Lemur Toolkit by doxygen1.2.18