00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _PASSAGEREP_HPP
00013 #define _PASSAGEREP_HPP
00014
00015 #include <vector>
00016 #include "MatchInfo.hpp"
00017
00019 struct PassageScore {
00021 int id;
00023 int start;
00025 int end;
00027 double score;
00028 };
00029
00031 class PassageScoreVector : public vector<PassageScore> {
00032 public:
00033 PassageScoreVector() : vector<PassageScore>() {
00034 }
00036 void sortScores() {
00037 sort(this->begin(), this->end(), cmpFn);
00038 }
00039 private:
00040 class PassageScoreDescending {
00041 public:
00042 bool operator()(const PassageScore & a, const PassageScore & b) {
00043 return a.score > b.score;
00044 }
00045 };
00046 static PassageScoreDescending cmpFn;
00047 };
00054 class PassageRep : public DocumentRep {
00055 public:
00061 PassageRep(DocumentRep &dRep, int d, int p, int o) :
00062 DocumentRep(dRep.getID(), p),
00063 docRep(dRep), docEnd(d), psgSize(p), overlap(o) {
00064
00065 docRep.setDocLength(docLength);
00066 }
00067
00068 #if 0
00069 PassageRep(): DocumentRep(0, 0), docRep(*this) {
00070 }
00071 #endif
00072
00073 #if 0
00074
00075 void startPassageIteration() {
00076 start = 0;
00077 end = psgSize < docEnd ? psgSize : docEnd;
00078 }
00080 bool hasMorePassage() {
00081 return(start < docEnd);
00082 }
00084 void nextPassage() {
00085 int next = start + (end - overlap);
00086 if(next < docEnd)
00087 start = next;
00088 else
00089 start = docEnd;
00090 end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00091 docLength = end - start;
00092
00093 docRep.setDocLength(docLength);
00094 }
00095 #endif
00096
00097 class iterator {
00098 public:
00099 iterator() : start(0), end(0), psgSize(0), overlap(0), docEnd(0),
00100 rep(NULL) {};
00101 iterator(int s, int e, int p, int o, int d, PassageRep *r) : start(s),
00102 end(e),
00103 psgSize(p),
00104 overlap(o),
00105 docEnd(d) {
00106 rep = new PassageRep(*r);
00107 };
00108
00109 virtual ~iterator() {
00110 delete(rep);
00111 };
00113 virtual PassageRep &operator*(){ return *rep;};
00114
00115 virtual iterator& operator++(){
00116 int next = start + (end - overlap);
00117 if(next < docEnd)
00118 start = next;
00119 else
00120 start = docEnd;
00121 end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00122 int docLength = end - start;
00123
00124 rep->setEnd(start, end, docLength);
00125 return *this;
00126 };
00127
00128 virtual iterator& operator++(int){
00129 int next = start + (end - overlap);
00130 if(next < docEnd)
00131 start = next;
00132 else
00133 start = docEnd;
00134 end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00135 int docLength = end - start;
00136
00137 rep->setEnd(start, end, docLength);
00138 return *this;
00139 };
00141 virtual bool operator==(iterator& other)
00142 {
00143 return (other.start == start && other.end == end);
00144 };
00146 virtual bool operator!=(iterator& other)
00147 {
00148 return !(other.start == start && other.end == end);
00149 };
00150 protected:
00152 PassageRep *rep;
00154 int psgSize;
00156 int overlap;
00158 int docEnd;
00160 int start;
00162 int end;
00163 };
00164
00165
00166 PassageRep::iterator begin() {
00167 start = 0;
00168 pEnd = psgSize < docEnd ? psgSize : docEnd;
00169
00170 PassageRep::iterator retval(0, pEnd, psgSize, overlap, docEnd, this);
00171 return retval;
00172 }
00173
00174 PassageRep::iterator end() {
00175 PassageRep::iterator retval (docEnd, docEnd, psgSize, overlap, docEnd,
00176 this);
00177 return retval;
00178 }
00180 void setEnd(int s, int e, int dl) {
00181 start = s;
00182 pEnd = e;
00183 docRep.setDocLength(dl);
00184 }
00191 int passageTF(TERMID_T tid, MatchInfo *matches) const {
00192 int tf = 0;
00193 int pos = 0;
00194 MatchInfo::iterator m = matches->begin();
00195 while (m != matches->end() && pos < pEnd) {
00196 TMatch match = *m;
00197 pos = match.position;
00198
00199 if (pos > docEnd) docEnd = pos + 1;
00200 if (match.tid == tid) {
00201 if (pos >= start && pos < pEnd) {
00202 tf++;
00203 }
00204 }
00205 m++;
00206 }
00207 return tf;
00208 }
00210 int getStart () const {return start;}
00212 int getEnd () const {return pEnd;}
00213
00215 virtual double termWeight(TERMID_T termID, const DocInfo *info) const {
00216 return docRep.termWeight(termID, info);
00217 }
00218
00220 virtual double scoreConstant() const {
00221 return docRep.scoreConstant();
00222 }
00223
00224
00225 protected:
00227 DocumentRep &docRep;
00229 int psgSize;
00231 int overlap;
00233 mutable int docEnd;
00235 mutable int start;
00237 mutable int pEnd;
00238 };
00239
00240 #endif
00241
00242
00243