Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

MMRSumm.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _MMRSUMM_HPP
00013 #define _MMRSUMM_HPP
00014 
00015 #include <iomanip>
00016 #include "Summarizer.hpp"
00017 #include "Passage.hpp"
00018 #include "MMRPassage.hpp"
00019 #include "InvFPIndex.hpp"
00020 #include <algorithm>
00021 #include <vector>
00022 #include <string>
00023 using std::string;
00024 using std::vector;
00025 
00026 static const string EOS("*eos");
00027 static const string TITLE("*title");
00028 static const string PRONOUN("*pronoun");
00029 
00030 #define PSG_LEN  15
00031 
00037 class MMRSumm : public Summarizer {
00038 
00039 private:
00040   double lambda;
00041   const InvFPIndex* idx;
00042   int summLen;
00043   vector<MMRPassage> doc;
00044   mutable int iterCount;
00045   double maxSims;
00046   MMRPassage* queryPassage;
00047 
00048   int autoMMRQuery(void) {
00049     TermInfo* tEntry;
00050     TermInfoList* tList = idx->termInfoListSeq(idx->document(queryPassage->docID));
00051     termCount* storage;
00052     if (hasTITLE(idx, tList)) {
00053       // use title words
00054       tList->startIteration();
00055       cout << "title found" << endl;
00056       while (tList->hasMore()) {
00057         tEntry = tList->nextEntry();
00058         if ( isTITLE(idx->term(tEntry->termID())) ) {
00059           tEntry = tList->nextEntry(); // the actual word after title token
00060           storage = new termCount;
00061           storage->termID = tEntry->termID();
00062           storage->tf = tEntry->count();
00063           storage->val = tEntry->count();
00064           queryPassage->addTerm(*storage);
00065         }
00066       }      
00067     } else {
00068       tList->startIteration();
00069       for (int i=0; i<10; i++) {
00070         if (tList->hasMore()) {
00071           tEntry = tList->nextEntry();
00072           storage = new termCount;
00073           storage->termID = tEntry->termID();
00074           storage->tf = tEntry->count();
00075           storage->val = tEntry->count();
00076           queryPassage->addTerm(*storage);
00077         }
00078       } 
00079     }
00080     cout << "Autoquery: ";
00081     showPassage((*queryPassage).getAsVector(), idx);
00082     cout << endl;
00083 
00084     return 1;
00085   }
00086 
00087   int setMMRQuery(const string &qInfo) {
00088     if (qInfo != "") {
00089       termCount* storage;
00090       storage = new termCount;
00091       storage->termID = idx->term(qInfo);
00092       storage->tf = 1;
00093       storage->val = 1;
00094       queryPassage->addTerm(*storage);
00095       return 1;
00096     }
00097     return autoMMRQuery();
00098   }
00099 
00100 public:
00101 
00102   MMRSumm(const InvFPIndex* inIdx, int inSummLen = 5) {
00103     idx = inIdx;
00104     summLen = inSummLen;
00105     iterCount = 1;
00106     maxSims = -1.0;
00107     queryPassage = NULL;
00108     lambda = 1.0;
00109   };
00110   
00111   virtual void markPassages(int optLen, const string &qInfo);
00112 
00113   virtual void addPassage(Passage &psg);
00114 
00115   void addDocument(const string &docID);
00116 
00117   virtual int fetchPassages(Passage* psgs, int optLen) const;
00118   
00119   virtual void summDocument(const string &docID, const int optLen, const string &qInfo);
00120 
00121   virtual void scorePassages(const string &qInfo);
00122 
00123   virtual void clear(void);
00124 
00125   virtual int nextPassage(Passage* psg) const;
00126 
00127   virtual void iterClear(void) const;
00128 
00129   virtual void outputSumm(void) const;
00130 
00131   void findNextPassage(MMRPassage &psg, const InvFPIndex* idx, 
00132                        const TermInfoList* tList, int eos);
00133 
00134   void showPassage(const passageVec* psg, const InvFPIndex* idx) const;
00135   
00136   void showMarkedPassages() const ;
00137 
00138   int isEOS(const string &check) {
00139     return (check == EOS);
00140   }
00141   
00142   int hasEOS(const InvFPIndex* idx, const TermInfoList* tList) {
00143     tList->startIteration();
00144     TermInfo* tEntry;
00145     while (tList->hasMore()) {
00146       tEntry = tList->nextEntry();
00147       if ( isEOS(idx->term(tEntry->termID())) ) return true;
00148     }
00149     return false;
00150   }
00151   
00152   int isTITLE(const string & check) {
00153     //    return !strcmp(check, TITLE);
00154     return (check == TITLE);
00155   }
00156   
00157   int hasTITLE(const InvFPIndex* idx, const TermInfoList* tList) {
00158     tList->startIteration();
00159     TermInfo* tEntry;
00160     while (tList->hasMore()) {
00161       tEntry = tList->nextEntry();
00162       if ( isTITLE(idx->term(tEntry->termID())) ) return true;
00163     }
00164     return false;
00165   }
00166   
00167   int isPRONOUN(const string &check) {
00168     return (check == PRONOUN);
00169   }
00170   
00171   struct compareSW {
00172     double lambda;
00173     compareSW(double l) { lambda = l; }
00174     bool operator()(const MMRPassage p1, const MMRPassage p2) const {
00175       return p1.computeMMR(lambda) > p2.computeMMR(lambda);
00176     }
00177   };
00178   
00179 }; // MMRSumm
00180 
00181 #endif

Generated on Wed Nov 3 12:59:00 2004 for Lemur Toolkit by doxygen1.2.18