Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

TermInfoList.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 #ifndef _TERMINFOLIST_HPP
00014 #define _TERMINFOLIST_HPP
00015 
00016 #include "IndexTypes.hpp"
00017 #include "Exception.hpp"
00018 
00020 
00029 class TermInfo {
00030 public:
00031   TermInfo() {}
00032   TermInfo( TERMID_T termID, COUNT_T termCount) :
00033     tid(termID), tcount(termCount) {}
00034   virtual ~TermInfo() {}
00035 
00037   virtual TERMID_T termID() const {return tid;}
00038 
00040   virtual void termID(TERMID_T id) {tid = id;}
00041 
00043   virtual COUNT_T count() const {return tcount;}
00044 
00046   virtual void count(COUNT_T c) {tcount = c;}
00047 
00048   // Return list of positions this term occurs in this document
00049   // (can be a list of 1 item)
00050   // Default implementation to return NULL if no position information available for this TermInfo
00051   // List of positions is better used for bag of words support
00052   virtual const LOC_T* positions() const{ return NULL; }
00053 
00054   // Return position this term occurs in this document
00055   // Better for sequence of words support
00056   // When list of positions can be obtained, this returns the first item in the list
00057   // Default implementation to return -1 if no position information available for this TermInfo
00058   virtual LOC_T position() const { return -1; }
00059 
00060   virtual void position(LOC_T pos) {}
00061 
00062 protected:
00063   TERMID_T tid;
00064   COUNT_T tcount;
00065 };
00066 
00067 
00069 
00077 class TermInfoList {
00078 public:
00079   virtual ~TermInfoList() {}
00080 
00081 protected:
00082   // Helper functions for iterator, subclasses should override
00084   virtual TermInfo* newElement() const { return new TermInfo(); }
00086   virtual TermInfo* getElement(TermInfo* elem, POS_T position) const =0;
00089   virtual void assignElement(TermInfo* to, TermInfo* from) const { *to = *from; }
00091   virtual POS_T beginPosition() const =0;
00093   virtual POS_T endPosition() const =0;
00095   virtual POS_T nextPosition(POS_T position) const =0;
00096 
00097 public:
00098   // Single, internal iteration
00100   virtual void startIteration()const=0;
00102   virtual bool hasMore()const=0;
00104   virtual TermInfo *nextEntry()const=0;
00105 
00106   // C++ style forward input (readonly) iterator
00108   class iterator : std::iterator<std::input_iterator_tag, TermInfo> {
00109   public:
00110     iterator() : list(NULL), position(NULL), current(NULL) {}
00111     iterator(const iterator& other) {
00112       list = other.list;
00113       position = other.position;
00114       if ((list) && (other.current) ) {
00115         current = list->newElement();
00116         list->assignElement(current, other.current);  // list knows element class
00117       } else {
00118         current = NULL;
00119       }
00120     }
00121     iterator(const TermInfoList* til, POS_T pos) : list(til), position(pos) {
00122       if (list) {
00123         if (position != list->endPosition()) {
00124           current = list->newElement();   // get new element
00125           current = list->getElement(current, position);
00126         } else {
00127           current = NULL;
00128         }
00129       }
00130     }
00131 
00132     ~iterator() {
00133         delete(current);
00134     }
00135 
00136     TermInfo& operator*() { return *current; }
00137     TermInfo* operator->() { return current; }
00138     iterator& operator++() {
00139       position = list->nextPosition(position);
00140       if (position != list->endPosition())
00141         current = list->getElement(current, position);
00142       return *this;
00143     }
00144     // identical to prefix version
00145     iterator& operator++(int) {
00146       return operator++();
00147     }
00148     bool operator==(const iterator& other) const {
00149       return (list == other.list) && (position == other.position);
00150     }
00151     bool operator!=(const iterator& other) const {
00152       return (list != other.list) || (position != other.position);
00153     }
00154     iterator& operator=(const iterator& other) {
00155       list = other.list;
00156       position = other.position;
00157       if ((list) && (other.current)) {
00158         if (!current)
00159           current = list->newElement();
00160         list->assignElement(current, other.current);  // list knows element class
00161       } else {
00162         delete(current);
00163         current=NULL;
00164       }
00165       return *this;
00166     }
00169     void seek(POS_T pos) {
00170       position = pos;
00171       if (position != list->endPosition()) {
00172         if (!current)
00173           current = list->newElement();
00174         current = list->getElement(current, position);
00175       } else {
00176         delete(current);
00177         current = NULL;
00178       }
00179     }
00180 
00181   protected:
00182     const TermInfoList* list;  // list associated with this iterator
00183     POS_T position;     // current position in list
00184     TermInfo* current;   // current element of list
00185   }; // end of nested iterator declaration
00186  
00187   iterator& begin() const { 
00188     iterator it(this, beginPosition());
00189     itbegin = it;
00190     return itbegin;
00191   }
00192   iterator& end() const { 
00193     iterator it(this, endPosition());
00194     itend = it;
00195     return itend;
00196   }
00197 
00198 protected:
00199   mutable TermInfoList::iterator itbegin;  // iterator at head of list
00200   mutable TermInfoList::iterator itend;    // iterator at end of list
00201   friend class iterator;
00202 };
00203 
00204 
00205 #endif

Generated on Wed Nov 3 12:59:05 2004 for Lemur Toolkit by doxygen1.2.18