Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

TermListBuilder.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // indri::index::TermListBuilder
00015 //
00016 // 14 July 2004 -- tds 
00017 //
00018 
00019 #ifndef INDRI_TERMLISTBUILDER_HPP
00020 #define INDRI_TERMLISTBUILDER_HPP
00021 
00022 #include "indri/FieldExtent.hpp"
00023 #include "indri/Buffer.hpp"
00024 #include "RVLCompress.hpp"
00025 #include "indri/RVLDecompressStream.hpp"
00026 #include "indri/FieldExtent.hpp"
00027 #include "indri/greedy_vector"
00028 
00029 namespace indri {
00030   namespace index {
00031     class TermListBuilder {
00032     private:
00033       greedy_vector<int> _terms;
00034       greedy_vector<FieldExtent> _fields;
00035 
00036     public:
00037       void clear() {
00038         _terms.clear();
00039         _fields.clear();
00040       }
00041 
00042       void addField( const indri::index::FieldExtent& field ) {
00043         _fields.push_back( field );
00044       }
00045 
00046       void addTerm( const int termID ) {
00047         _terms.push_back( termID );
00048       }
00049 
00050       greedy_vector<int>& terms() {
00051         return _terms;
00052       }
00053 
00054       const greedy_vector<int>& terms() const {
00055         return _terms;
00056       }
00057 
00058       greedy_vector<indri::index::FieldExtent>& fields() {
00059         return _fields;
00060       }
00061 
00062       const greedy_vector<indri::index::FieldExtent>& fields() const {
00063         return _fields;
00064       }
00065 
00066       void read( const char* buffer, int size ) {
00067         clear();
00068         RVLDecompressStream stream( buffer, size );
00069 
00070         int termCount;
00071         int fieldCount;
00072 
00073         stream >> termCount
00074               >> fieldCount;
00075 
00076         for( int i=0; i<termCount; i++ ) {
00077           int termID;
00078           stream >> termID;
00079           _terms.push_back( termID ); 
00080         }
00081 
00082         for( int i=0; i<fieldCount; i++ ) {
00083           FieldExtent extent;
00084 
00085           stream >> extent.id
00086                 >> extent.begin
00087                 >> extent.end
00088                 >> extent.number;
00089         }
00090       }
00091 
00092       void write( Buffer& buffer ) {
00093         // format:
00094         //   term count
00095         //   field count
00096         //   termID * termCount (compressed)
00097         //   ( fieldID, begin, (delta begin) end, number ) * fieldCount
00098 
00099         buffer.clear();
00100         int length = 10 + 5 * _terms.size() + 2 * sizeof(FieldExtent) * _fields.size();
00101         char* begin = buffer.write( length );
00102         char* out = begin;
00103 
00104         // write count of terms and fields in the document first
00105         out = RVLCompress::compress_int( out, _terms.size() );
00106         out = RVLCompress::compress_int( out, _fields.size() );
00107 
00108         // write out terms
00109         int termsSize = RVLCompress::compress_ints( &_terms.front(), (unsigned char*) out, _terms.size() );
00110         out += termsSize;
00111 
00112         // write out fields
00113         for( unsigned int i=0; i<_fields.size(); i++ ) {
00114           out = RVLCompress::compress_int( out, _fields[i].id );
00115           out = RVLCompress::compress_int( out, _fields[i].begin );
00116           out = RVLCompress::compress_int( out, _fields[i].end );
00117           out = RVLCompress::compress_longlong( out, _fields[i].number );
00118         }
00119 
00120         buffer.unwrite( length - (out - begin) );
00121       }
00122     };
00123   }
00124 }
00125 
00126 #endif // INDRI_TERMLISTBUILDER_HPP

Generated on Wed Nov 3 12:59:05 2004 for Lemur Toolkit by doxygen1.2.18