00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // DocListIterator 00015 // 00016 // 9 January 2004 - tds 00017 // 00018 00019 #ifndef INDRI_DOCLISTITERATOR_HPP 00020 #define INDRI_DOCLISTITERATOR_HPP 00021 00022 #include "indri/DocPositionInfoList.hpp" 00023 #include "indri/DocListFileIterator.hpp" 00024 #include "indri/DocListInfo.hpp" 00025 00026 #define KEYFILE_DOCLISTITERATOR_DEFAULT_BUFFERSIZE (128*1024) 00027 #define KEYFILE_DOCLISTITERATOR_MAX_BUFFERSIZE (1024*1024) 00028 00029 namespace indri { 00030 namespace index { 00031 class DocListIterator : public DocPositionInfoList { 00032 private: 00033 mutable DocListFileIterator _fileIterator; // a file iterator that is iterating over all term/document postings 00034 int _termID; // termID of the term for which we're interested in reading document positions 00035 mutable bool _atNext; // if true, this means that the file iterator already points to the next document 00036 mutable bool _finished; // if true, then we have read all postings for this term already 00037 00038 public: 00039 DocListIterator( File& segment, 00040 int termID, 00041 File::offset_type seekLocation = 0, 00042 INT64 estimatedDataLength = KEYFILE_DOCLISTITERATOR_DEFAULT_BUFFERSIZE ); 00043 ~DocListIterator(); 00044 00045 // get the iterator ready to return data; call this before calling currentEntry or nextEntry 00046 void startIteration() const; 00047 00050 bool hasMore() const; 00051 00052 // move to the next document in the list; return null if there are no more valid documents 00053 DocInfo* nextEntry() const ; 00054 00055 // find the first document that contains this term that has an id >= documentID. 00056 // returns null if no such document exists. 00057 DocInfo* nextEntry( DOCID_T documentID ); 00058 00059 // return the current document entry if we're not finished, null otherwise. 00060 DocInfo* currentEntry(); 00061 00062 // iterator support functions 00063 virtual DocInfo* newElement() const { return new DocListInfo(); } 00064 virtual void assignElement(DocInfo* to, DocInfo* from) const { 00065 DocListInfo *tmp = dynamic_cast<DocListInfo *>(to); 00066 DocListInfo *cur = dynamic_cast<DocListInfo*>(from); 00067 *tmp = *cur; 00068 } 00069 00071 virtual DocInfo* getElement(DocInfo* elem, POS_T position) const; 00073 virtual POS_T beginPosition() const; 00075 virtual POS_T endPosition() const; 00077 virtual POS_T nextPosition(POS_T position) const; 00078 }; 00079 } 00080 } 00081 00082 #endif // INDRI_DOCLISTITERATOR_HPP 00083 00084