00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _INVDOCLIST_HPP
00014 #define _INVDOCLIST_HPP
00015
00016
00017
00018
00019
00020
00021 #include <cmath>
00022 #include "InvFPTypes.hpp"
00023 #include "common_headers.hpp"
00024 #include "DocInfoList.hpp"
00025 #include "MemCache.hpp"
00026 #include "RVLCompress.hpp"
00027
00028 extern "C" {
00029 #include <cstdio>
00030 }
00031
00032 #define DEFAULT 9
00033
00034 class InvDocList: public DocInfoList {
00035 public:
00036 InvDocList();
00037
00040 InvDocList(TERMID_T id, int len);
00042 InvDocList(MemCache* mc, TERMID_T id, int len);
00043 InvDocList(MemCache* mc, TERMID_T id, int len, DOCID_T docid, LOC_T location);
00045 InvDocList(TERMID_T id, int listlen, LOC_T* list, int fr, DOCID_T* ldocid, int len);
00046 ~InvDocList();
00047
00053 void setList(TERMID_T id, int listlen, LOC_T* list, int fr, DOCID_T* ldocid=NULL, int len=0);
00054
00058 void setListSafe(TERMID_T id, int listlen, LOC_T* list, int fr, DOCID_T* ldocid, int len);
00059
00063 void reset();
00064
00067 void resetFree();
00068
00069 bool allocMem();
00070 bool hasNoMem();
00071
00073 virtual bool addTerm(DOCID_T docid);
00074
00076 virtual bool append(InvDocList* tail);
00077
00078 virtual void startIteration() const;
00079 virtual bool hasMore() const;
00080 virtual DocInfo* nextEntry() const;
00081 virtual void nextEntry(DocInfo* info) const;
00082
00083 DOCID_T curDocID() const{ if (lastid == NULL) return -1; return *lastid; };
00084 COUNT_T docFreq() const{ return df; };
00085 int length() const{ return end-begin; };
00086 TERMID_T termID() const{ return uid; };
00087 int termLen() const{ return strlength; };
00088 virtual COUNT_T termCTF() const;
00089 int curDocIDdiff() const{ return lastid-begin; };
00090 int curDocIDtf() const{ return *(lastid+1); };
00091 int memorySize() const{ return size; };
00092
00094 void binWrite(ofstream& of);
00095
00097 bool binRead(ifstream& inf);
00098
00100 void binWriteC(ofstream& of);
00101
00103 bool binReadC(ifstream& inf);
00104
00105 protected:
00106
00108
00110
00112
00114
00115
00119 bool getMoreMem();
00120 int logb2(int num);
00121
00124 virtual void deltaEncode();
00125
00128 virtual void deltaDecode();
00129
00130
00131 LOC_T* begin;
00132 LOC_T* lastid;
00133 LOC_T* freq;
00134 LOC_T * end;
00135 mutable LOC_T* iter;
00136 int size;
00137 int LOC_Tsize;
00138 int strlength;
00139 TERMID_T uid;
00140 COUNT_T df;
00141 MemCache* cache;
00142 bool hascache;
00143
00144 bool READ_ONLY;
00145 private:
00146 mutable DocInfo entry;
00147 };
00148
00149 #endif