Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

QueryNode.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002-2003 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 /*
00012   author: fff, dmf
00013   * 03/31/2003 -- Split QueryNode out of StructQueryRep, refactor some 
00014   * code out of rep into node classes (more OOP that way).
00015 */
00016 
00017 #ifndef _QUERYNODE_HPP
00018 #define _QUERYNODE_HPP
00019 #include "StructQryDocRep.hpp"
00020 #include "ProxInfo.hpp"
00021 
00022 // forward declaration
00023 class QueryNode;
00024 
00028 class QnList {
00029 public:
00030   QnList() {}
00031   ~QnList(); 
00033   void startIteration() {i = 0;}
00035   bool hasMore() {  return (i < qnList.size()); }
00037   QueryNode * nextNode()  {QueryNode *qn = qnList[i++]; return qn; }
00039   const QueryNode * getNode(int j) const{QueryNode *qn = qnList[j]; return qn; }
00041   int size() const{return qnList.size();}
00043   void push_back(QueryNode *qn){qnList.push_back(qn);}
00045   QueryNode *popNode() { 
00046     QueryNode *qn = qnList[i]; 
00047     qnList[i++] = NULL;
00048     return qn;  
00049   }
00050 private:
00052   mutable int i;
00054   vector<QueryNode *> qnList;
00055 };
00056 //------------------------------------------------------------
00057 //      Abstract Interface for Query Nodes 
00058 //------------------------------------------------------------
00059 
00060 
00065 class QueryNode {
00066 public:
00068   QueryNode(int id, double weight) : 
00069     w(weight),it(id),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00070     dList(NULL), proxList(NULL), dw(0) {}
00072   QueryNode() : w(1),it(0),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00073                 dList(NULL), proxList(NULL), dw(0){}
00075   QueryNode(double weight, double dWeight = 0) : w(weight), it(0), ch(NULL),
00076                                                  entries(0), nextDoc(0),
00077                                                  dCnt(0),proxList(NULL),
00078                                                  dList(NULL), 
00079                                                  dw(dWeight) {}
00081   virtual ~QueryNode() {
00082     delete[](dList);
00083     delete(proxList);
00084     delete(ch);
00085   }
00087   const QnList *children() const { return ch;}
00089   void setChildren(QnList *cl) {ch = cl;}
00091   int id() const { return it;}
00093   double weight() const { return w; }
00094   // update weight
00095   void setWeight(double wt) { w = wt; }
00096   // update entries count
00097   void setEntries(int cnt) { entries = cnt; }
00098 
00100   virtual void copyDocList(int len, int tf, const DocInfoList *dl, int dc) {
00101   }
00103   virtual void updateDocList(int numDocs) = 0;
00105   virtual double eval(const DocumentRep *dRep) const = 0;
00106 
00108   bool isProxNode() const { return proxList != NULL; }
00110   void startProxIteration() const {if (proxList) proxList->startIteration();}
00112   bool hasMoreProx() const {return proxList && proxList->hasMore();}
00114   bool nextProxItem() const {return proxList && proxList->nextDoc();}
00117   bool nextProxItem(DOCID_T did) const{
00118     return proxList && proxList->nextDoc(did);
00119   }
00120 
00122   int dCnt;
00124   bool *dList;
00126   mutable DOCID_T nextDoc;
00128   ProxInfo * proxList;
00129 
00130 protected:  
00133   void transformPassageOps();
00135   QnList *ch;
00137   int entries;
00139   int it;
00141   double w; 
00143   double dw;
00144 
00146   void unionDocList(int numDocs);
00148   void intersectDocList(int numDocs);
00149 };
00150 
00152 class BeliefNode : public QueryNode {
00153 public:
00154   BeliefNode(double wt) : QueryNode(wt) { }
00155   BeliefNode(int id, double weight) : QueryNode(id, weight) { }  
00156   BeliefNode(double wt, double dbelief) : QueryNode(wt, dbelief) { }
00157   virtual ~BeliefNode() { }
00159   virtual void updateDocList(int numDocs) {
00160     unionDocList(numDocs);   
00161   }
00162 };
00163 
00164 
00166 class ProxNode : public QueryNode {
00167 public:
00168   ProxNode(double wt) : QueryNode(wt) {}
00169   
00170   ProxNode(int id, double weight) :  QueryNode(id, weight) { }  
00171   ProxNode(double w, double d) : QueryNode(w, d), winSize(0) {}
00172   ProxNode(int size, double w, double d) : QueryNode(w, d), winSize(size) {}
00173   virtual ~ProxNode() { }
00175   virtual double eval(const DocumentRep *dRep) const {
00176     return proximityScore(dRep, dw);
00177   }
00179   virtual void updateDocList(int numDocs) {
00180     intersectDocList(numDocs);   
00181   }
00182 
00183 protected:
00185   int winSize;
00186 private:
00190   double proximityScore(const DocumentRep *dR, double defaultScore) const {
00191     const StructQryDocRep *dRep = (const StructQryDocRep *)dR;
00192     int tf = 0;
00193     double score;
00194     if(dRep->did < nextDoc) {
00195       return defaultScore;
00196     }
00197     // Skip to next doc id that we can score (> or == to current).
00198     if (proxList->nextDoc(dRep->did)) {
00199       double idf = dRep->computeIdfScore(dCnt);
00200       // compute proximity score for whole doc
00201       tf = proxList->count();
00202       score = dRep->beliefScore(tf, idf);
00203       // advance the prox pointer.
00204       if (proxList->nextDoc()) {
00205         nextDoc = proxList->id();
00206       } else {
00207         // No more prox entries.
00208         nextDoc = INT_MAX;
00209       }
00210     } else {
00211       score = defaultScore;
00212     }
00213     return score;
00214   }
00215 };
00216 
00217 
00220 class SumQnode : public BeliefNode {
00221 public:
00222   SumQnode(double wt) : BeliefNode(wt){}
00223   virtual ~SumQnode() {}
00226   virtual double eval(const DocumentRep *dRep) const{
00227     double sum = 0;
00228     int count = 0;
00229     const QueryNode *qn;
00230     ch->startIteration();
00231     while(ch->hasMore()) {
00232       qn = ch->nextNode();
00233       sum += qn->eval(dRep);
00234       count++;
00235     }
00236     //    return sum/entries;
00237     return sum/count;
00238   }
00239 };
00240 
00243 class WsumQnode : public BeliefNode {
00244 public:
00245   WsumQnode(double w) : BeliefNode(w) {}
00246   virtual ~WsumQnode() {}
00249   virtual double eval(const DocumentRep *dRep) const{
00250     double sum = 0;
00251     double total_wt = 0;
00252     const QueryNode *qn;
00253     double wt;
00254     ch->startIteration();
00255     while(ch->hasMore()) {
00256       qn = ch->nextNode();
00257       wt = qn->weight(); 
00258       total_wt += wt;
00259       sum += wt * qn->eval(dRep);
00260     }
00261     if(total_wt > 0) // normalized by the sum of the weights
00262       sum /= total_wt;
00263     // the belief is scaled by the weight 'w' of itself.
00264     return sum;
00265   }
00266 };
00267 
00268 
00271 class AndQnode : public BeliefNode {
00272 public:
00273   AndQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00274   virtual ~AndQnode() {}
00278   virtual double eval(const DocumentRep *dRep) const{
00279     double prod = 1;
00280     const QueryNode *qn;
00281     double wt;
00282     ch->startIteration();
00283     while(ch->hasMore()) {
00284       qn = ch->nextNode();
00285       wt = qn->eval(dRep);
00286       if(wt > dw)
00287         prod *= wt;
00288       else
00289         prod *= dw;
00290     }
00291     return prod;
00292   }
00293 };
00294 
00298 class OrQnode : public BeliefNode {
00299 public:
00300   OrQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00301   virtual ~OrQnode() {}
00304   virtual double eval(const DocumentRep *dRep) const {
00305     double prod = 1.0;
00306     const QueryNode *qn;
00307     double wt;
00308     ch->startIteration();
00309     while(ch->hasMore()) {
00310       qn = ch->nextNode();
00311       wt = qn->eval(dRep);
00312       if(wt > dw)
00313         prod *= (1.0 - wt);
00314     }
00315     return (1.0 - prod);
00316   }
00317 };
00318 
00321 class NotQnode : public BeliefNode {
00322 public:
00323   NotQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00324   virtual ~NotQnode() {}
00326   virtual double eval(const DocumentRep *dRep) const{
00327     // inverting the belief in the only child node
00328     const QueryNode *qn;
00329     ch->startIteration();
00330     qn = ch->nextNode();
00331     return (1.0 - qn->eval(dRep));
00332   }
00333 };
00334 
00337 class MaxQnode : public BeliefNode {
00338 public:
00339   MaxQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00340   virtual ~MaxQnode() {}
00343   virtual double eval(const DocumentRep *dRep) const{
00344     double mx = dw;
00345     const QueryNode *qn;
00346     double wt;
00347     ch->startIteration();
00348     while(ch->hasMore()) {
00349       qn = ch->nextNode();
00350       wt = qn->eval(dRep);
00351       if(wt > mx) 
00352         mx = wt;
00353     }
00354     return mx;
00355   }
00356 };
00357 
00361 class BandQnode : public BeliefNode {
00362 public:
00363   BandQnode(double dbelief, double w) : BeliefNode(w, dbelief) {}
00364   virtual ~BandQnode() {}
00368   virtual double eval(const DocumentRep *dRep) const{
00369     double prod = 1.0;
00370     const QueryNode *qn;
00371     double wt;
00372     StructQryDocRep * myRep = (StructQryDocRep *)dRep;
00373     DOCID_T did = myRep->did;
00374     ch->startIteration();
00375     while(ch->hasMore()) {
00376       qn = ch->nextNode();
00377       // advance child prox entry to this document
00378       if (qn->hasMoreProx() && qn->nextProxItem(did))
00379         qn->nextDoc = did; // update nextDoc for term node eval
00380       wt = qn->eval(dRep);
00381       if(wt > dw) 
00382         prod *= wt;
00383       else 
00384         return 0;
00385     }
00386     return prod;
00387   }
00388   virtual void updateDocList(int numDocs) {
00389     intersectDocList(numDocs); // different from superclass.
00390   }
00391 };
00392 
00397 class BandNotQnode : public BeliefNode {
00398 public:
00399   BandNotQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00400   virtual ~BandNotQnode() {}
00404   virtual double eval(const DocumentRep *dRep) const{
00405     double child1Wt;
00406     double child2Wt;
00407     // boolean_and_not consider only two children
00408     const QueryNode *qn;
00409     ch->startIteration();
00410     qn = ch->nextNode();
00411     child1Wt = qn->eval(dRep);
00412     qn = ch->nextNode();
00413     child2Wt = qn->eval(dRep);
00414     if(child2Wt > dw)
00415       return 0;
00416     else
00417       return (child1Wt * (1.0 - child2Wt));
00418   }
00419 };
00420 
00421 
00426 class FiltRejQnode : public BeliefNode {
00427 public:
00428   FiltRejQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00429   virtual ~FiltRejQnode() {}
00434   virtual double eval(const DocumentRep *dRep) const{
00435     double child1Wt;
00436     double child2Wt;
00437     // filter_reject consider only two children
00438     const QueryNode *qn;
00439     ch->startIteration();
00440     qn = ch->nextNode();
00441     child1Wt = qn->eval(dRep);
00442     qn = ch->nextNode();
00443     child2Wt = qn->eval(dRep);
00444     if(child1Wt > dw && child2Wt <= dw)
00445       return child1Wt;
00446     else
00447       return dw;
00448   }
00449 };
00450 
00455 class FiltReqQnode : public BeliefNode {
00456 public:
00457   FiltReqQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00458   virtual ~FiltReqQnode() {}
00462   virtual double eval(const DocumentRep *dRep) const{
00463     double child1Wt;
00464     double child2Wt;
00465     // filter_require consider only two children
00466     const QueryNode *qn;
00467     ch->startIteration();
00468     qn = ch->nextNode();
00469     child1Wt = qn->eval(dRep);
00470     qn = ch->nextNode();
00471     child2Wt = qn->eval(dRep);
00472     if(child1Wt > dw && child2Wt > dw)
00473       return child1Wt;
00474     else
00475       return dw;
00476   }
00477   virtual void updateDocList(int numDocs) {
00478     intersectDocList(numDocs); // different from superclass.
00479   }
00480 };
00481 
00483 class TermQnode : public ProxNode {
00484 public:
00485   TermQnode(int id, double weight) : ProxNode(id, weight) { }
00486   virtual ~TermQnode() {}
00488   virtual double eval(const DocumentRep *dRep) const{ 
00489     StructQryDocRep * myRep = (StructQryDocRep *)dRep;
00490     DOCID_T did = myRep->did;
00491     double freq = 0.0;
00492     if (nextDoc == did) {
00493       freq = (double)proxList->count();
00494       if (hasMoreProx()) {
00495         // advance the prox pointer.
00496         nextProxItem();
00497         nextDoc = proxList->id();
00498       } else {
00499         // No more prox entries.
00500         nextDoc = INT_MAX;
00501       }
00502     }
00503     return(myRep->termWeight(it, freq, dCnt));
00504   }
00506   virtual void copyDocList(int len, int tf, const DocInfoList *dl, int dc);
00507 };
00508 
00511 class OdnQNode : public ProxNode {
00512 public:
00513   OdnQNode(int size, double w, double d) : ProxNode(size, w, d){}
00514   virtual ~OdnQNode() {}
00515   virtual void updateDocList(int numDocs) {
00516     ProxNode::updateDocList(numDocs);   // use superclass method
00517     orderedProxList(numDocs);
00518   }
00519   
00520 private:
00522   void orderedProxList(int numDocs);
00524   bool foundOrderedProx(int bpos, int wsize, const QnList *cl, int ith);
00525 };
00526 
00529 class UwnQNode : public ProxNode {
00530 public:
00531   UwnQNode(int size, double w, double d) : ProxNode(size, w, d) {}
00532   virtual ~UwnQNode() {}
00533   virtual void updateDocList(int numDocs) {
00534     ProxNode::updateDocList(numDocs);   // use superclass method
00535     unorderedProxList(numDocs);
00536   }
00537 private:
00539   void unorderedProxList(int numDocs);
00541   bool findUnorderedWin(const QueryNode *cqn, QnList *cl, int winSize);
00542 };
00543 
00544 
00546 // fix to be WPARSUMN 
00547 // This is a prox operator with embedded belief operators spliced out.
00550 class PassageQNode : public ProxNode {
00551 public:
00552   PassageQNode(int size, double w) : ProxNode(w){ winSize = size; }
00553   virtual ~PassageQNode() {}
00554 
00559   virtual double eval(const DocumentRep *dR) const{
00560     StructQryDocRep *dRep = (StructQryDocRep *)dR;
00561     double maxScore = 0;
00562     double score;      
00563     dRep->startPassageIteration(winSize);
00564     while(dRep->hasMorePassage()) {
00565       score = passageScore(dRep);
00566       if(score > maxScore) {
00567         maxScore = score;
00568       }
00569       dRep->nextPassage();
00570     }
00571     return maxScore;
00572   }
00573 
00574   virtual void updateDocList(int numDocs) {
00575     unionDocList(numDocs); // different from superclass.
00576     transformPassageOps();
00577   }
00578 private:
00581   double passageScore(const StructQryDocRep *dRep) const;
00582 };
00583 
00586 class SynQNode : public ProxNode {
00587 public:
00588   SynQNode(double w, double d) : ProxNode(w, d) {}
00589   virtual ~SynQNode() {}
00590   virtual void updateDocList(int numDocs) {
00591     unionDocList(numDocs); // different from superclass.
00592     synonymProxList();
00593   }
00594   
00595 private:
00597   void synonymProxList();
00598 };
00599 
00602 class PropQNode : public OdnQNode {
00603 public:
00604   PropQNode(double w, double d) : OdnQNode(0, w, d){}
00605   virtual ~PropQNode() {}
00606 };
00607 
00608 #endif

Generated on Wed Nov 3 12:59:02 2004 for Lemur Toolkit by doxygen1.2.18