Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

FlattextDocMgr.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  *
00003  *  Original source copyright (c) 2001, Carnegie Mellon University.
00004  *  See copyright.cmu for details.
00005  *  Modifications copyright (c) 2002, University of Massachusetts.
00006  *  See copyright.umass for details.
00007  *
00008  *==========================================================================
00009 */
00010 
00011 #ifndef _FLATTEXTDOCMGR_HPP
00012 #define _FLATTEXTDOCMGR_HPP
00013 
00014 #include "common_headers.hpp"
00015 #include "TextHandlerManager.hpp"
00016 #include "Exception.hpp"
00017 #include "DocumentManager.hpp"
00018 
00019 #define FT_SUFFIX ".flat"
00020 #define FT_LOOKUP ".lookup"
00021 #define FT_FID    ".fid"
00022 
00023 class FlattextDocMgr : public DocumentManager, public TextHandler {
00024 public:
00025 
00026   struct lookup_e {
00027     int fid;
00028     long offset;
00029     long bytes;
00030   };
00031 
00032   struct abc {
00033     bool operator() (char* s1, char* s2) const {
00034       return strcmp(s1, s2) < 0;
00035     }
00036   };
00037 
00042   FlattextDocMgr(string name, string mode, string source);  
00043 
00046   FlattextDocMgr(const string &name);
00047 
00048   virtual ~FlattextDocMgr();
00049 
00051   virtual bool open(const string &manname);
00052 
00053   virtual Parser* getParser() const {
00054     return TextHandlerManager::createParser(parseMode);
00055   }
00057   virtual const string &getMyID() const;
00058   
00060   virtual char* getDoc(const string &docID) const;
00061 
00062   virtual void buildMgr();
00063 
00064   char* handleDoc(char * docno);
00065   void  handleEndDoc();
00066 
00067 protected:
00068   Parser* myparser;
00069 
00070 private:
00072   bool readinSources(const string &fn);
00073 
00075   void writeTOC();
00076 
00077   bool loadTOC(const string &fn);
00078   bool loadFTLookup(const string &fn);
00079   bool loadFTFiles(const string &fn, int num);
00080 
00081   int numdocs;              // how many docs we have
00082   string parseMode;           // what type of parser we have
00083   long prevpos;              // pos of previous doc beginning
00084   string IDname;            // my name
00085   string IDnameext;         // my name with type extension
00086   vector<string> sources;   // list of all source files
00087   int fileid;                       // fileid of current file being processed
00088   ofstream writefpos;       // stream for writing out file positions
00089   mutable map<string, lookup_e*, less<string> > table; 
00090   lookup_e* entries;        // array of lookup entries
00091 };
00092 
00093 #endif

Generated on Wed Nov 3 12:58:55 2004 for Lemur Toolkit by doxygen1.2.18