Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

PassageRep.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2003-2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _PASSAGEREP_HPP
00013 #define _PASSAGEREP_HPP
00014 
00015 #include <vector>
00016 #include "MatchInfo.hpp"
00017 
00019 struct PassageScore {
00021   int id;
00023   int start;
00025   int end;
00027   double score;
00028 };
00029 
00031 class PassageScoreVector : public vector<PassageScore> {
00032 public:
00033   PassageScoreVector() : vector<PassageScore>() {
00034   }
00036   void sortScores() {
00037     sort(this->begin(), this->end(), cmpFn);
00038   }
00039 private:
00040   class PassageScoreDescending { 
00041   public: 
00042     bool operator()(const PassageScore & a, const PassageScore & b) {
00043       return a.score > b.score;
00044     }
00045   };
00046   static PassageScoreDescending cmpFn;
00047 };
00054 class PassageRep : public DocumentRep {
00055 public:
00061   PassageRep(DocumentRep &dRep, int d, int p, int o) :
00062     DocumentRep(dRep.getID(), p),
00063     docRep(dRep), docEnd(d), psgSize(p), overlap(o) {
00064     // update encapsulated rep.
00065     docRep.setDocLength(docLength);
00066   }
00067 
00068 #if 0
00069   PassageRep(): DocumentRep(0, 0), docRep(*this) { 
00070   }
00071 #endif
00072 
00073   #if 0
00074 
00075   void startPassageIteration() {
00076     start = 0;
00077     end = psgSize < docEnd ? psgSize : docEnd;
00078   }
00080   bool hasMorePassage() {
00081     return(start < docEnd);
00082   }
00084   void nextPassage() {
00085     int next = start + (end - overlap);
00086     if(next < docEnd)
00087       start = next;
00088     else
00089       start = docEnd;
00090     end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00091     docLength = end - start; // adjust for shorter last passage.
00092     // update encapsulated rep.
00093     docRep.setDocLength(docLength);
00094   }
00095 #endif
00096 
00097   class iterator {
00098   public:
00099     iterator() : start(0), end(0), psgSize(0), overlap(0), docEnd(0),
00100     rep(NULL) {};
00101     iterator(int s, int e, int p, int o, int d, PassageRep *r) : start(s), 
00102                                                                  end(e), 
00103                                                                  psgSize(p), 
00104                                                                  overlap(o), 
00105                                                                  docEnd(d) {
00106       rep = new PassageRep(*r);
00107     };
00108     // risk of double deletes when copying?
00109     virtual ~iterator() { 
00110       delete(rep);
00111     };
00113     virtual PassageRep &operator*(){ return *rep;};
00114 
00115     virtual iterator& operator++(){
00116       int next = start + (end - overlap);
00117       if(next < docEnd)
00118         start = next;
00119       else
00120         start = docEnd;
00121       end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00122       int docLength = end - start; // adjust for shorter last passage.
00123       // update encapsulated rep. // fix this!
00124       rep->setEnd(start, end, docLength);
00125       return *this;
00126     };
00127 
00128     virtual iterator& operator++(int){  // ++foo semantics here?
00129       int next = start + (end - overlap);
00130       if(next < docEnd)
00131         start = next;
00132       else
00133         start = docEnd;
00134       end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00135       int docLength = end - start; // adjust for shorter last passage.
00136       // update encapsulated rep. // fix this!
00137       rep->setEnd(start, end, docLength);
00138       return *this;
00139     }; 
00141     virtual bool operator==(iterator& other)
00142     {
00143       return (other.start == start && other.end == end);
00144     };
00146     virtual bool operator!=(iterator& other)
00147     {
00148       return !(other.start == start && other.end == end);
00149     };
00150   protected:
00152     PassageRep *rep;
00154     int psgSize;
00156     int overlap;
00158     int docEnd;
00160     int start;
00162     int end;
00163   };
00164   
00165   // could trim start, end attributes.
00166   PassageRep::iterator begin() {
00167     start = 0;
00168     pEnd = psgSize < docEnd ? psgSize : docEnd;
00169     // need to keep the state in the iterator only.
00170     PassageRep::iterator retval(0, pEnd, psgSize, overlap, docEnd, this);
00171     return retval;
00172   }
00173 
00174   PassageRep::iterator end() {
00175     PassageRep::iterator retval (docEnd, docEnd, psgSize, overlap, docEnd, 
00176                                  this);
00177     return retval;
00178   }
00180   void setEnd(int s, int e, int dl) {
00181     start = s;
00182     pEnd = e;
00183     docRep.setDocLength(dl);
00184   }
00191   int passageTF(TERMID_T tid, MatchInfo *matches) const {
00192     int tf = 0;
00193     int pos = 0;
00194     MatchInfo::iterator m = matches->begin();
00195     while (m != matches->end() && pos < pEnd) {
00196       TMatch match = *m;
00197       pos = match.position;
00198       // adjust for stopwords not counted. Bleah
00199       if (pos > docEnd) docEnd = pos + 1;
00200       if (match.tid == tid) {
00201         if (pos >= start && pos < pEnd) {
00202           tf++;
00203         }
00204       }
00205       m++;
00206     }
00207     return tf;
00208   }
00210   int getStart () const {return start;}
00212   int getEnd () const {return pEnd;}
00213 
00215   virtual double termWeight(TERMID_T termID, const DocInfo *info) const {
00216     return docRep.termWeight(termID, info);
00217   }
00218   
00220   virtual double scoreConstant()  const {
00221     return docRep.scoreConstant();
00222   }
00223   
00224 
00225 protected:
00227   DocumentRep &docRep;
00229   int psgSize;
00231   int overlap;
00233   mutable int docEnd;
00235   mutable int start;
00237   mutable int pEnd;
00238 };
00239 
00240 #endif /* _PASSAGEREP_HPP */
00241 
00242 
00243 

Generated on Wed Nov 3 12:59:01 2004 for Lemur Toolkit by doxygen1.2.18