00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // DocListFileIterator 00015 // 00016 // 9 January 2004 - tds 00017 // 00018 00019 #ifndef INDRI_DOCLISTFILEITERATOR_HPP 00020 #define INDRI_DOCLISTFILEITERATOR_HPP 00021 00022 #include "File.hpp" 00023 #include "indri/DocListDiskBlockReader.hpp" 00024 #include "ReadBuffer.hpp" 00025 00026 namespace indri { 00027 namespace index { 00028 class DocListFileIterator { 00029 File* _file; 00030 DocListDiskBlockReader _reader; 00031 ReadBuffer _readBuffer; 00032 DocListInfo _info; 00033 int _segment; 00034 bool _finished; 00035 bool _partialDocument; 00036 File::offset_type _fileOffset; /* starting offset */ 00037 00038 bool _skipToNextBlock(); 00039 void _fetchDocument(); 00040 00041 public: 00042 DocListFileIterator( File* file, int segment, size_t bufferSize, File::offset_type fileOffset = 0 ); 00043 00044 bool finished() const; 00045 void startIteration(); 00046 00047 bool skip( int termID, int documentID ); 00048 DocListInfo& currentDocument(); 00049 bool nextDocument(); 00050 00051 int document() const; 00052 int termID() const; 00053 int segment() const; 00054 void remove(); 00055 }; 00056 } 00057 } 00058 00059 #endif // INDRI_DOCLISTITERATOR_HPP 00060