Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

CompressedCollection.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2003-2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // CompressedCollection.hpp
00015 //
00016 // 12 May 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_COMPRESSEDCOLLECTION_HPP
00020 #define INDRI_COMPRESSEDCOLLECTION_HPP
00021 
00022 #include "indri/Collection.hpp"
00023 #include "string-set.h"
00024 #include <string>
00025 #include "File.hpp"
00026 #include "Keyfile.hpp"
00027 #include "WriteBuffer.hpp"
00028 #include "indri/Buffer.hpp"
00029 #include "indri/HashTable.hpp"
00030 
00031 class CompressedCollection : public Collection {
00032 private:
00033   Keyfile _lookup;
00034   File _storage;
00035   WriteBuffer* _output;
00036   Buffer _positionsBuffer;
00037   struct z_stream_s* _stream;
00038   HashTable<const char*, Keyfile*> _metalookups;
00039   String_set* _strings;
00040 
00041   void _writePositions( ParsedDocument* document, int& keyLength, int& valueLength );
00042   void _writeMetadataItem( ParsedDocument* document, int i, int& keyLength, int& valueLength );
00043   void _writeText( ParsedDocument* document, int& keyLength, int& valueLength );
00044 
00045   void _readPositions( ParsedDocument* document, const void* positionData, int positionDataLength );
00046 
00047 public:
00048   CompressedCollection();
00049   ~CompressedCollection();
00050 
00051   void create( const std::string& fileName );
00052   void create( const std::string& fileName, const std::vector<std::string>& indexedFields );
00053   void open( const std::string& fileName );
00054   void openRead( const std::string& fileName );
00055   void close();
00056 
00057   ParsedDocument* retrieve( int documentID );
00058   std::string retrieveMetadatum( int documentID, const std::string& attributeName );
00059   void addDocument( int documentID, ParsedDocument* document );
00060 };
00061 
00062 #endif // INDRI_COMPRESSEDCOLLECTION_HPP

Generated on Wed Nov 3 12:58:52 2004 for Lemur Toolkit by doxygen1.2.18