00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #ifndef _INVTERMLIST_HPP 00013 #define _INVTERMLIST_HPP 00014 00015 #include "common_headers.hpp" 00016 #include "InvFPTerm.hpp" 00017 #include "InvFPTypes.hpp" 00018 00019 00027 class InvTermList : public TermInfoList { 00028 public: 00029 InvTermList(); 00030 ~InvTermList(); 00031 00033 void startIteration() const; 00034 00036 bool hasMore() const; 00037 00039 TermInfo *nextEntry() const; 00040 00042 COUNT_T docLength() const { return length; } 00043 00045 COUNT_T termCount() const{ return listlen/2; } 00046 00048 DOCID_T docID() const{ return uid; } 00049 00052 bool binRead(ifstream& infile); 00053 00054 protected: 00055 // Helper functions for iterator, subclasses should override 00057 virtual TermInfo* getElement(TermInfo* elem, POS_T position) const; 00059 virtual POS_T beginPosition() const { return (POS_T) list; } 00061 virtual POS_T endPosition() const { return (POS_T) end; } 00063 virtual POS_T nextPosition(POS_T position) const; 00064 00065 private: 00066 00067 DOCID_T uid; // this doc's id 00068 int length; // length of this document (terms + stopwords) 00069 // LOC_T==TERMID_T==COUNT_T==DOCID_T 00070 LOC_T* list; // beginning of list of terms and counts 00071 LOC_T* end; // end of list 00072 int listlen; // number of terms in list 00073 mutable LOC_T* iter; // index for iterator 00074 mutable TermInfo entry; 00075 }; 00076 00077 00078 00079 00080 #endif