00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_FIELDLISTITERATOR_HPP
00020 #define INDRI_FIELDLISTITERATOR_HPP
00021
00022 #include "indri/Extent.hpp"
00023 #include "indri/FieldListDiskBlockReader.hpp"
00024 #include "File.hpp"
00025 #include "ReadBuffer.hpp"
00026
00027 #define INDRI_FIELDLISTITERATOR_BUFFERSIZE (1024*1024)
00028
00029 namespace indri {
00030 namespace index {
00031
00032 struct FieldExtentInfo {
00033 int documentID;
00034 greedy_vector<Extent> extents;
00035 greedy_vector<UINT64> numbers;
00036 };
00037
00038 class FieldListIterator {
00039 private:
00040 FieldListDiskBlockReader _reader;
00041 FieldExtentInfo _info;
00042 bool _finished;
00043 bool _hasMore;
00044 bool _numeric;
00045
00046 File& _file;
00047 ReadBuffer _readBuffer;
00048
00049 bool _fetchNextBlock() {
00050 const char* nextBlock = _readBuffer.read( INDRI_FIELDLIST_BLOCKSIZE );
00051
00052 if( !nextBlock )
00053 return false;
00054
00055 _reader.setBlock( nextBlock );
00056 _reader.next();
00057 return true;
00058 }
00059
00060 void _fetchDocument( int documentID, bool& endBlock ) {
00061 if( _numeric ) {
00062 _fetchDocumentNumeric( documentID, endBlock );
00063 } else {
00064 _fetchDocumentNonNumeric( documentID, endBlock );
00065 }
00066 }
00067
00068 void _fetchDocumentNonNumeric( int documentID, bool& endBlock ) {
00069 endBlock = false;
00070
00071 if( documentID != _reader.document() )
00072 return;
00073
00074 _info.extents.push_back( _reader.extent() );
00075
00076 while( _reader.next() ) {
00077 if( _reader.document() != documentID )
00078 return;
00079
00080 _info.extents.push_back( _reader.extent() );
00081 }
00082
00083 endBlock = true;
00084 }
00085
00086 void _fetchDocumentNumeric( int documentID, bool& endBlock ) {
00087 endBlock = false;
00088
00089 if( documentID != _reader.document() )
00090 return;
00091
00092 _info.extents.push_back( _reader.extent() );
00093 _info.numbers.push_back( _reader.number() );
00094
00095 while( _reader.next() ) {
00096 if( _reader.document() != documentID )
00097 return;
00098
00099 _info.extents.push_back( _reader.extent() );
00100 _info.numbers.push_back( _reader.number() );
00101 }
00102
00103 endBlock = true;
00104 }
00105
00106 public:
00107 FieldListIterator( File& fieldListFile, bool numeric ) :
00108 _file( fieldListFile ),
00109 _readBuffer( fieldListFile, INDRI_FIELDLISTITERATOR_BUFFERSIZE, false ),
00110 _numeric(numeric)
00111 {
00112 }
00113
00114 void startIteration() {
00115 _hasMore = true;
00116 _readBuffer.seekg( 0, std::fstream::beg );
00117 _info.documentID = -1;
00118 _finished = !_fetchNextBlock();
00119 }
00120
00121 bool hasMore() {
00122 return _hasMore;
00123 }
00124
00125 const FieldExtentInfo* currentEntry() {
00126 if( !_finished )
00127 return &_info;
00128
00129 return 0;
00130 }
00131
00132 const FieldExtentInfo* nextEntry() {
00133 if( _finished )
00134 return 0;
00135
00136 if( !_hasMore ) {
00137 _finished = true;
00138 return 0;
00139 }
00140
00141
00142 int documentID = _reader.document();
00143 _info.documentID = documentID;
00144 _info.extents.clear();
00145 _info.numbers.clear();
00146 bool atEndOfBlock;
00147
00148 _fetchDocument( documentID, atEndOfBlock );
00149
00150 while( atEndOfBlock ) {
00151 if( ! _fetchNextBlock() ) {
00152 _hasMore = false;
00153 return &_info;
00154 }
00155
00156 _fetchDocument( documentID, atEndOfBlock );
00157 }
00158
00159 return &_info;
00160 }
00161
00162 const FieldExtentInfo* nextEntry( int documentID ) {
00163 if( _finished )
00164 return 0;
00165
00166 if( documentID <= _info.documentID )
00167 return &_info;
00168
00169 if( !_hasMore ) {
00170 _finished = true;
00171 return 0;
00172 }
00173
00174
00175
00176 while( _reader.lastDocument() < documentID ) {
00177 if( ! _fetchNextBlock() ) {
00178 _finished = true;
00179 _hasMore = false;
00180 return 0;
00181 }
00182 }
00183
00184 const FieldExtentInfo* info;
00185
00186
00187 for( info = nextEntry(); info && info->documentID < documentID; info = nextEntry() )
00188 ;
00189
00190 return info;
00191 }
00192 };
00193 }
00194 }
00195
00196 #endif // INDRI_FIELDLISTITERATOR_HPP