Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

BasicDocStream.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _BASICFILESTREAM_HPP
00014 #define _BASICFILESTREAM_HPP
00015 
00017 
00055 #include "common_headers.hpp"
00056 #include <cassert>
00057 #include <cstdio>
00058 #include <cstring>
00059 #include "DocStream.hpp"
00060 #include "Exception.hpp"
00061 
00062 
00063 #define MAXLINE 65536
00064 
00065 
00067 
00068 class BasicTokenDoc : public Document {
00069  public:
00070   BasicTokenDoc() {
00071   }
00072   BasicTokenDoc(ifstream *stream): docStr(stream) {
00073   }
00074    void startTermIteration() const;  
00075   
00076   const char *getID() const { return id;}
00077 
00078   bool hasMore() const{ return (strcmp(curWord, "</DOC>") != 0);}
00079     
00080   const Term * nextTerm() const;
00081 
00082   void skipToEnd() const;
00083   friend class BasicDocStream;
00084  private:
00085   void readID(); 
00086   mutable char *curWord;
00087   mutable char buf1[20000];
00088   mutable char buf2[20000];
00089   char id[2000];
00090   ifstream *docStr;
00091   streampos startPos; // starting position of the terms in the file
00092   //replace  static BasicTokenTerm t; with attribute
00093   mutable Term t;
00094 };
00095 
00096 
00098 class BasicDocStream : public DocStream
00099 {
00100 public:
00101   BasicDocStream() {}
00102   BasicDocStream (const string &inputFile);
00103 
00104   virtual ~BasicDocStream() {  delete ifs;}
00105 
00106 public:
00107         
00108   bool hasMore(); 
00109 
00110   void startDocIteration();
00111 
00112   Document *nextDoc();
00113 
00114 private:
00115   char file[1024];
00116   ifstream *ifs;
00117   char buf[2000];
00118   bool nextTokenRead;
00119   // replace static BasicTokenDoc doc;  with attribute
00120   BasicTokenDoc doc;
00121 };
00122 
00123 
00124 
00125 
00126 #endif
00127 
00128 
00129 
00130 

Generated on Wed Nov 3 12:58:51 2004 for Lemur Toolkit by doxygen1.2.18