00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.cs.cmu.edu/~lemur/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 #ifndef _BASICINDEXWITHCAT_HPP 00014 #define _BASICINDEXWITHCAT_HPP 00015 00016 #include "IndexWithCat.hpp" 00017 00019 00027 class BasicIndexWithCat : public IndexWithCat { 00028 public: 00030 BasicIndexWithCat(Index &wordIndex, Index &categoryIndex, bool catIsTerm= true); 00031 00032 00033 ~BasicIndexWithCat() { delete [] base2cat; delete [] cat2base ;}; 00034 00036 bool open(const string &name) {return false;} 00037 00039 00040 00042 virtual TERMID_T term (const string &word) const { return baseIndex->term(word);} 00043 00045 virtual const string term (TERMID_T termID) const { return baseIndex->term(termID);} 00046 00048 virtual DOCID_T document (const string &docIDStr) const { return baseIndex->document(docIDStr);} 00049 00051 virtual const string document (DOCID_T docID) const { return baseIndex->document(docID);} 00052 00054 virtual int category (const string &catIDStr) const; 00055 00057 virtual const string category (int catID) const; 00058 00060 00062 00063 00065 virtual COUNT_T docCount () const { return baseIndex->docCount();} 00066 00068 virtual COUNT_T termCountUnique () const { return baseIndex->termCountUnique();} 00069 00071 virtual COUNT_T termCount (TERMID_T termID) const { return baseIndex->termCount(termID); } 00072 00074 virtual COUNT_T termCount () const { return baseIndex->termCount(); } 00075 00077 virtual float docLengthAvg() const { return baseIndex->docLengthAvg(); } 00078 00080 virtual COUNT_T docCount(TERMID_T termID) const { return baseIndex->docCount(termID);} 00081 00083 virtual COUNT_T docLength (DOCID_T docID) const {return baseIndex->docLength(docID); } 00084 00086 virtual int catCount() const; 00087 00089 virtual int docCountInCat(int catID) const; 00090 00092 00094 00095 00096 virtual DocInfoList *docInfoList(TERMID_T termID) const { return baseIndex->docInfoList(termID); } 00097 00099 virtual TermInfoList *termInfoList(DOCID_T docID) const { return baseIndex->termInfoList(docID); } 00100 00102 00104 00105 00106 00107 virtual void startCatIDIteration(int docID) const; 00108 virtual bool hasMoreCatID() const; 00110 00114 virtual int nextCatID() const; 00115 00116 00117 virtual void startDocIDIteration(int catID) const; 00118 virtual bool hasMoreDocID() const; 00119 00121 00125 virtual int nextDocID() const; 00126 00128 00129 00130 00131 private: 00133 Index *baseIndex; 00135 Index *catIndex; 00136 bool catAsTerm; 00137 00139 mutable TermInfoList *tmInfoBuffer; 00140 mutable DocInfoList *docInfoBuffer; 00141 mutable TermInfo *tmInfo; 00142 mutable DocInfo *docInfo; 00143 00145 int *base2cat; 00146 int *cat2base; 00147 00148 static const string OOVSTRING; 00149 }; 00150 00151 inline int BasicIndexWithCat::category (const string &catIDStr) const 00152 { 00153 if (catAsTerm) { 00154 return (catIndex->term(catIDStr)); 00155 } else { 00156 // docid as term, so cat as doc 00157 return (catIndex->document(catIDStr)); 00158 } 00159 } 00160 00161 00162 inline const string BasicIndexWithCat::category (int catID) const 00163 { 00164 if (catAsTerm) { 00165 return (catIndex->term(catID)); 00166 } else { 00167 // docid as term, so cat as doc 00168 return (catIndex->document(catID)); 00169 } 00170 } 00171 00172 00173 00174 inline int BasicIndexWithCat::catCount() const 00175 { 00176 if (catAsTerm) { 00177 return (catIndex->termCountUnique()-1); // -1 because of [OOV] 00178 } else { 00179 // docid as term, so cat as doc 00180 return (catIndex->docCount()); 00181 } 00182 } 00183 00184 inline int BasicIndexWithCat::docCountInCat(int catID) const 00185 { 00186 if (catAsTerm) { 00187 return (catIndex->docCount(catID)); 00188 } else { 00189 // docid as term, so cat as doc 00190 return (catIndex->docLength(catID)); 00191 } 00192 } 00193 00194 00195 00196 #endif /* _BASICINDEXWITHLABEL_HPP */