00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 /* 00013 10/18/2002 -- dmf Add binReadC, binWriteC, deltaDecode, and deltEncode 00014 for compression of TermInfoLists. 00015 */ 00016 00017 #ifndef _INVFPTERMLIST_HPP 00018 #define _INVFPTERMLIST_HPP 00019 00020 #include "common_headers.hpp" 00021 #include "InvFPTerm.hpp" 00022 #include "InvFPTypes.hpp" 00023 #include "RVLCompress.hpp" 00024 00032 class File; 00033 00034 class InvFPTermList : public TermInfoList { 00035 public: 00036 InvFPTermList(); 00037 InvFPTermList(DOCID_T did, int len, vector<LocatedTerm> &tls); 00038 ~InvFPTermList(); 00039 00041 void startIteration() const; 00042 00044 bool hasMore() const; 00045 00047 TermInfo *nextEntry() const; 00048 00050 COUNT_T docLength() const{ return length; } 00051 00053 COUNT_T termCount() const{ return listlen; } 00054 00056 DOCID_T docID() const{ return uid; } 00057 00060 bool binRead(ifstream& infile); 00062 bool binReadC(ifstream& infile); 00064 void binWriteC(ofstream& ofile); 00065 00066 bool binReadC( File& infile ); 00067 void binWriteC( File& outfile ); 00068 00071 virtual void deltaDecode(); 00074 virtual void deltaEncode(); 00075 00077 void countTerms(); 00078 00079 protected: 00080 // Helper functions for iterator, subclasses should override 00082 virtual TermInfo* newElement() const { return new InvFPTerm(); } 00084 virtual TermInfo* getElement(TermInfo* elem, POS_T position) const; 00086 virtual void assignElement(TermInfo* to, TermInfo* from) const { 00087 *static_cast<InvFPTerm*>(to) = *static_cast<InvFPTerm*>(from); 00088 } 00090 virtual POS_T beginPosition() const { return (POS_T) 0; } 00092 virtual POS_T endPosition() const { return (POS_T) listlen; } 00094 virtual POS_T nextPosition(POS_T position) const; 00095 00096 DOCID_T uid; // this doc's id 00097 COUNT_T length; // length of this document (terms + stopwords) 00098 LocatedTerm* list; // list of terms and locations 00099 LLTerm* listcounted; // list of terms and location lists 00100 COUNT_T listlen; // number of items we have in list (same as number of terms) 00101 mutable int index; // index for iterator 00102 LOC_T* counts; // keep track of counts of terms for bag of word 00103 mutable InvFPTerm entry; 00104 mutable vector<LOC_T> loclist; //list of locations to return 00105 00106 }; 00107 00108 00109 00110 00111 #endif