00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _INVINDEXMERGE_HPP
00014 #define _INVINDEXMERGE_HPP
00015
00016 #include "common_headers.hpp"
00017 #include "InvDocList.hpp"
00018 #include "InvFPTypes.hpp"
00019 #include "Exception.hpp"
00020
00021 #define READBUFSIZE 2000000
00022 #define NUM_FH_OPEN 32
00023
00024 struct IndexReader {
00025 InvDocList* list;
00026 ifstream* reader;
00027 };
00028
00029
00030 class InvIndexMerge {
00031 public:
00036 InvIndexMerge(char* buffer, long size, long maxfilesize=2100000000);
00037 InvIndexMerge(long buffersize=64000000, long maxfilesize=2100000000);
00038 virtual ~InvIndexMerge();
00039
00042 int merge(vector<string>* tf, const string &prefix);
00043
00044 void setMaxFileSize(long size);
00045 char* setBuffer(char* buffer, long size);
00046
00050 int hierMerge(vector<string>* files, int level);
00051
00053 virtual int mergeFiles(vector<string>* files, vector<string>* intmed, int level);
00054
00056 virtual int finalMerge(vector<string>* files);
00057
00058 protected:
00060 virtual void writeInvFIDs();
00062 virtual void least(vector<IndexReader*>* r, vector<TERMID_T>* ret);
00064 void setbuf(ifstream* fs, char* bp, int bytes);
00065
00066 string name;
00067 vector<string> invfiles;
00068 long maxfile;
00069 long bufsize;
00070 char* readbuffer;
00071 };
00072
00073 #endif