00001 /*========================================================================== 00002 * 00003 * Original source copyright (c) 2001, Carnegie Mellon University. 00004 * See copyright.cmu for details. 00005 * Modifications copyright (c) 2002, University of Massachusetts. 00006 * See copyright.umass for details. 00007 * 00008 *========================================================================== 00009 */ 00010 00011 #ifndef _FLATTEXTDOCMGR_HPP 00012 #define _FLATTEXTDOCMGR_HPP 00013 00014 #include "common_headers.hpp" 00015 #include "TextHandlerManager.hpp" 00016 #include "Exception.hpp" 00017 #include "DocumentManager.hpp" 00018 00019 #define FT_SUFFIX ".flat" 00020 #define FT_LOOKUP ".lookup" 00021 #define FT_FID ".fid" 00022 00023 class FlattextDocMgr : public DocumentManager, public TextHandler { 00024 public: 00025 00026 struct lookup_e { 00027 int fid; 00028 long offset; 00029 long bytes; 00030 }; 00031 00032 struct abc { 00033 bool operator() (char* s1, char* s2) const { 00034 return strcmp(s1, s2) < 0; 00035 } 00036 }; 00037 00042 FlattextDocMgr(string name, string mode, string source); 00043 00046 FlattextDocMgr(const string &name); 00047 00048 virtual ~FlattextDocMgr(); 00049 00051 virtual bool open(const string &manname); 00052 00053 virtual Parser* getParser() const { 00054 return TextHandlerManager::createParser(parseMode); 00055 } 00057 virtual const string &getMyID() const; 00058 00060 virtual char* getDoc(const string &docID) const; 00061 00062 virtual void buildMgr(); 00063 00064 char* handleDoc(char * docno); 00065 void handleEndDoc(); 00066 00067 protected: 00068 Parser* myparser; 00069 00070 private: 00072 bool readinSources(const string &fn); 00073 00075 void writeTOC(); 00076 00077 bool loadTOC(const string &fn); 00078 bool loadFTLookup(const string &fn); 00079 bool loadFTFiles(const string &fn, int num); 00080 00081 int numdocs; // how many docs we have 00082 string parseMode; // what type of parser we have 00083 long prevpos; // pos of previous doc beginning 00084 string IDname; // my name 00085 string IDnameext; // my name with type extension 00086 vector<string> sources; // list of all source files 00087 int fileid; // fileid of current file being processed 00088 ofstream writefpos; // stream for writing out file positions 00089 mutable map<string, lookup_e*, less<string> > table; 00090 lookup_e* entries; // array of lookup entries 00091 }; 00092 00093 #endif