Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

DocListIterator.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // DocListIterator
00015 //
00016 // 9 January 2004 - tds
00017 //
00018 
00019 #ifndef INDRI_DOCLISTITERATOR_HPP
00020 #define INDRI_DOCLISTITERATOR_HPP
00021 
00022 #include "indri/DocPositionInfoList.hpp"
00023 #include "indri/DocListFileIterator.hpp"
00024 #include "indri/DocListInfo.hpp"
00025 
00026 #define KEYFILE_DOCLISTITERATOR_DEFAULT_BUFFERSIZE   (128*1024)
00027 #define KEYFILE_DOCLISTITERATOR_MAX_BUFFERSIZE      (1024*1024)
00028 
00029 namespace indri {
00030   namespace index {
00031     class DocListIterator : public DocPositionInfoList {
00032     private:
00033       mutable DocListFileIterator _fileIterator;  // a file iterator that is iterating over all term/document postings
00034       int _termID;  // termID of the term for which we're interested in reading document positions
00035       mutable bool _atNext; // if true, this means that the file iterator already points to the next document
00036       mutable bool _finished;  // if true, then we have read all postings for this term already
00037 
00038     public:
00039       DocListIterator( File& segment,
00040                               int termID,
00041                               File::offset_type seekLocation = 0,
00042                               INT64 estimatedDataLength = KEYFILE_DOCLISTITERATOR_DEFAULT_BUFFERSIZE );
00043       ~DocListIterator();
00044 
00045       // get the iterator ready to return data; call this before calling currentEntry or nextEntry
00046       void startIteration() const;
00047 
00050       bool hasMore() const;
00051 
00052       // move to the next document in the list; return null if there are no more valid documents
00053       DocInfo* nextEntry() const ;
00054 
00055       // find the first document that contains this term that has an id >= documentID.
00056       // returns null if no such document exists.
00057       DocInfo* nextEntry( DOCID_T documentID );
00058 
00059       // return the current document entry if we're not finished, null otherwise.
00060       DocInfo* currentEntry();
00061 
00062       // iterator support functions
00063       virtual DocInfo* newElement() const { return new DocListInfo(); }
00064       virtual void assignElement(DocInfo* to, DocInfo* from) const { 
00065         DocListInfo *tmp = dynamic_cast<DocListInfo *>(to);
00066         DocListInfo *cur = dynamic_cast<DocListInfo*>(from);
00067         *tmp = *cur;
00068       }
00069 
00071       virtual DocInfo* getElement(DocInfo* elem, POS_T position) const;
00073       virtual POS_T beginPosition() const;
00075       virtual POS_T endPosition() const;
00077       virtual POS_T nextPosition(POS_T position) const;
00078     };
00079   }
00080 }
00081 
00082 #endif // INDRI_DOCLISTITERATOR_HPP
00083 
00084 

Generated on Wed Nov 3 12:58:54 2004 for Lemur Toolkit by doxygen1.2.18