00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_TERMLISTBUILDER_HPP
00020 #define INDRI_TERMLISTBUILDER_HPP
00021
00022 #include "indri/FieldExtent.hpp"
00023 #include "indri/Buffer.hpp"
00024 #include "RVLCompress.hpp"
00025 #include "indri/RVLDecompressStream.hpp"
00026 #include "indri/FieldExtent.hpp"
00027 #include "indri/greedy_vector"
00028
00029 namespace indri {
00030 namespace index {
00031 class TermListBuilder {
00032 private:
00033 greedy_vector<int> _terms;
00034 greedy_vector<FieldExtent> _fields;
00035
00036 public:
00037 void clear() {
00038 _terms.clear();
00039 _fields.clear();
00040 }
00041
00042 void addField( const indri::index::FieldExtent& field ) {
00043 _fields.push_back( field );
00044 }
00045
00046 void addTerm( const int termID ) {
00047 _terms.push_back( termID );
00048 }
00049
00050 greedy_vector<int>& terms() {
00051 return _terms;
00052 }
00053
00054 const greedy_vector<int>& terms() const {
00055 return _terms;
00056 }
00057
00058 greedy_vector<indri::index::FieldExtent>& fields() {
00059 return _fields;
00060 }
00061
00062 const greedy_vector<indri::index::FieldExtent>& fields() const {
00063 return _fields;
00064 }
00065
00066 void read( const char* buffer, int size ) {
00067 clear();
00068 RVLDecompressStream stream( buffer, size );
00069
00070 int termCount;
00071 int fieldCount;
00072
00073 stream >> termCount
00074 >> fieldCount;
00075
00076 for( int i=0; i<termCount; i++ ) {
00077 int termID;
00078 stream >> termID;
00079 _terms.push_back( termID );
00080 }
00081
00082 for( int i=0; i<fieldCount; i++ ) {
00083 FieldExtent extent;
00084
00085 stream >> extent.id
00086 >> extent.begin
00087 >> extent.end
00088 >> extent.number;
00089 }
00090 }
00091
00092 void write( Buffer& buffer ) {
00093
00094
00095
00096
00097
00098
00099 buffer.clear();
00100 int length = 10 + 5 * _terms.size() + 2 * sizeof(FieldExtent) * _fields.size();
00101 char* begin = buffer.write( length );
00102 char* out = begin;
00103
00104
00105 out = RVLCompress::compress_int( out, _terms.size() );
00106 out = RVLCompress::compress_int( out, _fields.size() );
00107
00108
00109 int termsSize = RVLCompress::compress_ints( &_terms.front(), (unsigned char*) out, _terms.size() );
00110 out += termsSize;
00111
00112
00113 for( unsigned int i=0; i<_fields.size(); i++ ) {
00114 out = RVLCompress::compress_int( out, _fields[i].id );
00115 out = RVLCompress::compress_int( out, _fields[i].begin );
00116 out = RVLCompress::compress_int( out, _fields[i].end );
00117 out = RVLCompress::compress_longlong( out, _fields[i].number );
00118 }
00119
00120 buffer.unwrite( length - (out - begin) );
00121 }
00122 };
00123 }
00124 }
00125
00126 #endif // INDRI_TERMLISTBUILDER_HPP