00001 00002 #include "TextHandler.hpp" 00003 #include "InvPushIndex.hpp" 00004 #include "Parser.hpp" 00005 #include "WordSet.hpp" 00006 00007 #include <stdio.h> 00008 00009 #ifndef _DOCFREQINDEXER_HPP 00010 #define _DOCFREQINDEXER_HPP 00011 00012 00013 class DocFreqIndexer : public TextHandler { 00014 00015 public: 00016 DocFreqIndexer(const string &csName, const string &cwName, 00017 const string &ssName, int bufferSize, 00018 bool countStopWords = false); 00019 ~DocFreqIndexer(); 00020 00021 char * handleDoc(char * docno); 00022 char * handleWord(char * word); 00023 void handleEndDoc(); 00024 00025 void newDb(const string &name); 00026 00027 00028 00029 private: 00030 00031 int cw; 00032 int dfCount; 00033 bool first; 00034 00035 InvPushIndex * collsel; 00036 00037 DocumentProps * csdp; 00038 InvFPTerm * term; 00039 00040 WordSet docWords; 00041 00042 FILE * collWords; 00043 FILE * serverSizes; 00044 int numDocs; 00045 00046 bool countStopWds; 00047 00048 }; 00049 00050 #endif