00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_COMPRESSEDCOLLECTION_HPP
00020 #define INDRI_COMPRESSEDCOLLECTION_HPP
00021
00022 #include "indri/Collection.hpp"
00023 #include "string-set.h"
00024 #include <string>
00025 #include "File.hpp"
00026 #include "Keyfile.hpp"
00027 #include "WriteBuffer.hpp"
00028 #include "indri/Buffer.hpp"
00029 #include "indri/HashTable.hpp"
00030
00031 class CompressedCollection : public Collection {
00032 private:
00033 Keyfile _lookup;
00034 File _storage;
00035 WriteBuffer* _output;
00036 Buffer _positionsBuffer;
00037 struct z_stream_s* _stream;
00038 HashTable<const char*, Keyfile*> _metalookups;
00039 String_set* _strings;
00040
00041 void _writePositions( ParsedDocument* document, int& keyLength, int& valueLength );
00042 void _writeMetadataItem( ParsedDocument* document, int i, int& keyLength, int& valueLength );
00043 void _writeText( ParsedDocument* document, int& keyLength, int& valueLength );
00044
00045 void _readPositions( ParsedDocument* document, const void* positionData, int positionDataLength );
00046
00047 public:
00048 CompressedCollection();
00049 ~CompressedCollection();
00050
00051 void create( const std::string& fileName );
00052 void create( const std::string& fileName, const std::vector<std::string>& indexedFields );
00053 void open( const std::string& fileName );
00054 void openRead( const std::string& fileName );
00055 void close();
00056
00057 ParsedDocument* retrieve( int documentID );
00058 std::string retrieveMetadatum( int documentID, const std::string& attributeName );
00059 void addDocument( int documentID, ParsedDocument* document );
00060 };
00061
00062 #endif // INDRI_COMPRESSEDCOLLECTION_HPP