00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 #ifndef _INDEX_HPP 00014 #define _INDEX_HPP 00015 00017 00055 // C. Zhai 02/08/2001 00056 00057 #include "TermInfoList.hpp" 00058 #include "DocInfoList.hpp" 00059 #include "DocumentManager.hpp" 00060 #include "lemur-platform.h" 00061 #include "lemur-compat.hpp" 00062 00063 class Index { 00064 public: 00065 00066 virtual ~Index() {}; 00067 00069 00070 00072 virtual bool open(const string &indexName)=0; 00074 00076 00077 00079 virtual TERMID_T term (const TERM_T &word) const=0; 00080 00082 virtual const TERM_T term (TERMID_T termID) const=0; 00083 00085 virtual DOCID_T document (const EXDOCID_T &docIDStr) const=0; 00086 00088 virtual const EXDOCID_T document (DOCID_T docID) const=0; 00091 // virtual const char* docManager(int docID) { return NULL;} 00092 virtual const DocumentManager* docManager(DOCID_T docID) const {return NULL;} 00093 00094 00096 00099 virtual const string termLexiconID() const { return "";} 00100 00102 00104 00105 00107 virtual COUNT_T docCount () const=0; 00108 00110 virtual COUNT_T termCountUnique () const=0; 00111 00113 virtual COUNT_T termCount (TERMID_T termID) const=0; 00114 00116 virtual COUNT_T termCount () const=0; 00117 00119 virtual float docLengthAvg() const=0; 00120 00122 virtual COUNT_T docCount(TERMID_T termID) const=0; 00123 00125 virtual COUNT_T docLength (DOCID_T docID) const=0; 00126 00128 00130 00131 00132 virtual DocInfoList *docInfoList(TERMID_T termID) const=0; 00133 00135 virtual TermInfoList *termInfoList(DOCID_T docID) const=0; 00136 00138 00139 // returns TermInfoList is sequential representation (not bag of words) 00140 // return NULL list when sequence is not available. 00141 virtual TermInfoList *termInfoListSeq(DOCID_T docID) const { return NULL; } 00142 00143 }; 00144 00145 00146 #endif 00147 00148 00149