Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

DocFreqIndexer.hpp

Go to the documentation of this file.
00001 
00002 #include "TextHandler.hpp"
00003 #include "InvPushIndex.hpp"
00004 #include "Parser.hpp"
00005 #include "WordSet.hpp"
00006 
00007 #include <stdio.h>
00008 
00009 #ifndef _DOCFREQINDEXER_HPP
00010 #define _DOCFREQINDEXER_HPP
00011 
00012 
00013 class DocFreqIndexer : public TextHandler {
00014 
00015 public:
00016   DocFreqIndexer(const string &csName, const string &cwName, 
00017                  const string &ssName, int bufferSize, 
00018                  bool countStopWords = false);
00019   ~DocFreqIndexer();
00020 
00021   char * handleDoc(char * docno);
00022   char * handleWord(char * word);
00023   void handleEndDoc();
00024 
00025   void newDb(const string &name);
00026 
00027 
00028 
00029 private:
00030   
00031   int cw;
00032   int dfCount;
00033   bool first;
00034 
00035   InvPushIndex * collsel;
00036 
00037   DocumentProps * csdp;
00038   InvFPTerm * term;
00039 
00040   WordSet docWords;
00041 
00042   FILE * collWords;
00043   FILE * serverSizes;
00044   int numDocs;
00045   
00046   bool countStopWds;
00047 
00048 };
00049 
00050 #endif

Generated on Wed Nov 3 12:58:53 2004 for Lemur Toolkit by doxygen1.2.18