00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_KEYFILEFIELDLISTDISKBLOCKWRITER_HPP
00020 #define INDRI_KEYFILEFIELDLISTDISKBLOCKWRITER_HPP
00021
00022 #define INDRI_FIELDLIST_BLOCKSIZE (4096)
00023 #define INDRI_EXTENT_REQUIRED_SPACE (5*3+10+4) // doc id, begin, end, numeric, additional _extentCount room
00024
00025 #include "RVLCompress.hpp"
00026
00027 namespace indri {
00028 namespace index {
00029 class FieldListDiskBlockWriter {
00030 private:
00031 char _block[ INDRI_FIELDLIST_BLOCKSIZE ];
00032 char* _data;
00033 char* _extentCountSpot;
00034
00035 unsigned int _lastEnd;
00036 unsigned int _lastDocument;
00037 unsigned int _extentCount;
00038
00039 bool _numeric;
00040
00041 const char* _beginMetadata() const {
00042 return _block + sizeof _block - 2*sizeof(UINT32);
00043 }
00044
00045 void _terminateDocument() {
00046 if( _extentCount ) {
00047 int extentCountSize = RVLCompress::compressedSize( _extentCount );
00048
00049 if( extentCountSize > 1 ) {
00050 ::memmove( _extentCountSpot + extentCountSize,
00051 _extentCountSpot + 1,
00052 _data - _extentCountSpot - 1 );
00053 _data += extentCountSize - 1;
00054 }
00055
00056 char* endExtentCount = RVLCompress::compress_int( _extentCountSpot, _extentCount );
00057 assert( endExtentCount == _extentCountSpot + extentCountSize );
00058 }
00059
00060 _extentCount = 0;
00061 _extentCountSpot = 0;
00062 }
00063
00064 public:
00065 FieldListDiskBlockWriter( bool numeric = false ) {
00066 clear();
00067 _numeric = numeric;
00068 }
00069
00070 void clear() {
00071 _lastEnd = 0;
00072 _lastDocument = 0;
00073 _extentCount = 0;
00074 _extentCountSpot = 0;
00075 _data = _block;
00076 }
00077
00078 const char* data() const {
00079 return _block;
00080 }
00081
00082 unsigned int dataSize() const {
00083 return sizeof _block;
00084 }
00085
00086 int lastDocument() {
00087 return _lastDocument;
00088 }
00089
00090 bool addExtent( unsigned int documentID, unsigned int begin, unsigned int end, UINT64 number = 0 ) {
00091 if( _beginMetadata() - _data < INDRI_EXTENT_REQUIRED_SPACE )
00092 return false;
00093
00094 if( documentID != _lastDocument ) {
00095 _terminateDocument();
00096
00097 _data = RVLCompress::compress_int( _data, documentID - _lastDocument );
00098 _lastDocument = documentID;
00099 _lastEnd = 0;
00100 _extentCountSpot = _data;
00101 _extentCount = 0;
00102 _data++;
00103 }
00104
00105 _data = RVLCompress::compress_int( _data, begin - _lastEnd );
00106 _data = RVLCompress::compress_int( _data, end - begin );
00107
00108 if( _numeric ) {
00109 _data = RVLCompress::compress_longlong( _data, number );
00110 }
00111
00112 _extentCount++;
00113 _lastEnd = end;
00114
00115 assert( begin <= end );
00116 assert( _data < _beginMetadata() );
00117 return true;
00118 }
00119
00120 void close() {
00121 char* lastDocumentSpot = _block + INDRI_FIELDLIST_BLOCKSIZE - sizeof(UINT32);
00122 char* lastByteSpot = _block + INDRI_FIELDLIST_BLOCKSIZE - 2*sizeof(UINT32);
00123
00124 _terminateDocument();
00125 *(UINT32*) lastDocumentSpot = _lastDocument;
00126 *(UINT32*) lastByteSpot = UINT32(_data - _block) | ( _numeric ? 0x80000000 : 0 );
00127 }
00128 };
00129 }
00130 }
00131
00132 #endif // INDRI_KEYFILEFIELDLISTDISKBLOCKWRITER_HPP
00133