Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

FieldListDiskBlockWriter.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // FieldListDiskBlockWriter.hpp
00015 //
00016 // 5 February 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_KEYFILEFIELDLISTDISKBLOCKWRITER_HPP
00020 #define INDRI_KEYFILEFIELDLISTDISKBLOCKWRITER_HPP
00021 
00022 #define INDRI_FIELDLIST_BLOCKSIZE (4096)
00023 #define INDRI_EXTENT_REQUIRED_SPACE (5*3+10+4) // doc id, begin, end, numeric, additional _extentCount room
00024 
00025 #include "RVLCompress.hpp"
00026 
00027 namespace indri {
00028   namespace index {
00029     class FieldListDiskBlockWriter {
00030     private:
00031       char _block[ INDRI_FIELDLIST_BLOCKSIZE ];
00032       char* _data;
00033       char* _extentCountSpot;
00034 
00035       unsigned int _lastEnd;
00036       unsigned int _lastDocument;
00037       unsigned int _extentCount;
00038 
00039       bool _numeric;
00040 
00041       const char* _beginMetadata() const {
00042         return _block + sizeof _block - 2*sizeof(UINT32);
00043       }
00044 
00045       void _terminateDocument() {
00046         if( _extentCount ) {
00047           int extentCountSize = RVLCompress::compressedSize( _extentCount );
00048 
00049           if( extentCountSize > 1 ) {
00050             ::memmove( _extentCountSpot + extentCountSize,
00051                       _extentCountSpot + 1,
00052                       _data - _extentCountSpot - 1 );
00053             _data += extentCountSize - 1;
00054           }
00055 
00056           char* endExtentCount = RVLCompress::compress_int( _extentCountSpot, _extentCount );
00057           assert( endExtentCount == _extentCountSpot + extentCountSize );
00058         }
00059 
00060         _extentCount = 0;
00061         _extentCountSpot = 0;
00062       }
00063 
00064     public:
00065       FieldListDiskBlockWriter( bool numeric = false ) {
00066         clear();
00067         _numeric = numeric;
00068       }
00069 
00070       void clear() {
00071         _lastEnd = 0;
00072         _lastDocument = 0;
00073         _extentCount = 0;
00074         _extentCountSpot = 0;
00075         _data = _block;
00076       }
00077 
00078       const char* data() const {
00079         return _block;
00080       }
00081 
00082       unsigned int dataSize() const {
00083         return sizeof _block;
00084       }
00085 
00086       int lastDocument() {
00087         return _lastDocument;
00088       }
00089 
00090       bool addExtent( unsigned int documentID, unsigned int begin, unsigned int end, UINT64 number = 0 ) {
00091         if( _beginMetadata() - _data < INDRI_EXTENT_REQUIRED_SPACE )
00092           return false;
00093 
00094         if( documentID != _lastDocument ) {
00095           _terminateDocument();
00096 
00097           _data = RVLCompress::compress_int( _data, documentID - _lastDocument );
00098           _lastDocument = documentID;
00099           _lastEnd = 0;
00100           _extentCountSpot = _data;
00101           _extentCount = 0;
00102           _data++;
00103         }
00104 
00105         _data = RVLCompress::compress_int( _data, begin - _lastEnd );
00106         _data = RVLCompress::compress_int( _data, end - begin );
00107 
00108         if( _numeric ) {
00109           _data = RVLCompress::compress_longlong( _data, number );
00110         }
00111 
00112         _extentCount++;
00113         _lastEnd = end;
00114 
00115         assert( begin <= end );
00116         assert( _data < _beginMetadata() );
00117         return true;
00118       }
00119 
00120       void close() {
00121         char* lastDocumentSpot = _block + INDRI_FIELDLIST_BLOCKSIZE - sizeof(UINT32);
00122         char* lastByteSpot = _block + INDRI_FIELDLIST_BLOCKSIZE - 2*sizeof(UINT32);
00123 
00124         _terminateDocument();
00125         *(UINT32*) lastDocumentSpot = _lastDocument;
00126         *(UINT32*) lastByteSpot = UINT32(_data - _block) | ( _numeric ? 0x80000000 : 0 );
00127       }
00128     };
00129   }
00130 }
00131 
00132 #endif // INDRI_KEYFILEFIELDLISTDISKBLOCKWRITER_HPP
00133 

Generated on Wed Nov 3 12:58:55 2004 for Lemur Toolkit by doxygen1.2.18