Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

InvIndexMerge.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _INVINDEXMERGE_HPP
00014 #define _INVINDEXMERGE_HPP
00015 
00016 #include "common_headers.hpp"
00017 #include "InvDocList.hpp"
00018 #include "InvFPTypes.hpp"
00019 #include "Exception.hpp"
00020 
00021 #define READBUFSIZE 2000000
00022 #define NUM_FH_OPEN 32
00023 
00024 struct IndexReader {
00025   InvDocList* list;
00026   ifstream* reader;
00027 };
00028 
00029 // this class could actually be static
00030 class InvIndexMerge {
00031 public:
00036   InvIndexMerge(char* buffer, long size, long maxfilesize=2100000000);
00037   InvIndexMerge(long buffersize=64000000, long maxfilesize=2100000000);
00038   virtual ~InvIndexMerge();
00039 
00042   int merge(vector<string>* tf, const string &prefix);
00043 
00044   void setMaxFileSize(long size);
00045   char* setBuffer(char* buffer, long size);
00046 
00050   int hierMerge(vector<string>* files, int level);
00051 
00053   virtual int mergeFiles(vector<string>* files, vector<string>* intmed, int level);
00054 
00056   virtual int finalMerge(vector<string>* files);
00057 
00058 protected:
00060   virtual void writeInvFIDs();
00062   virtual void least(vector<IndexReader*>* r, vector<TERMID_T>* ret);
00064   void setbuf(ifstream* fs, char* bp, int bytes);
00065 
00066   string name;
00067   vector<string> invfiles; // list of files that we've written to
00068   long maxfile; // maximum file size for each index
00069   long bufsize;
00070   char* readbuffer;
00071 };
00072 
00073 #endif

Generated on Wed Nov 3 12:58:58 2004 for Lemur Toolkit by doxygen1.2.18