00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _MMRSUMM_HPP
00013 #define _MMRSUMM_HPP
00014
00015 #include <iomanip>
00016 #include "Summarizer.hpp"
00017 #include "Passage.hpp"
00018 #include "MMRPassage.hpp"
00019 #include "InvFPIndex.hpp"
00020 #include <algorithm>
00021 #include <vector>
00022 #include <string>
00023 using std::string;
00024 using std::vector;
00025
00026 static const string EOS("*eos");
00027 static const string TITLE("*title");
00028 static const string PRONOUN("*pronoun");
00029
00030 #define PSG_LEN 15
00031
00037 class MMRSumm : public Summarizer {
00038
00039 private:
00040 double lambda;
00041 const InvFPIndex* idx;
00042 int summLen;
00043 vector<MMRPassage> doc;
00044 mutable int iterCount;
00045 double maxSims;
00046 MMRPassage* queryPassage;
00047
00048 int autoMMRQuery(void) {
00049 TermInfo* tEntry;
00050 TermInfoList* tList = idx->termInfoListSeq(idx->document(queryPassage->docID));
00051 termCount* storage;
00052 if (hasTITLE(idx, tList)) {
00053
00054 tList->startIteration();
00055 cout << "title found" << endl;
00056 while (tList->hasMore()) {
00057 tEntry = tList->nextEntry();
00058 if ( isTITLE(idx->term(tEntry->termID())) ) {
00059 tEntry = tList->nextEntry();
00060 storage = new termCount;
00061 storage->termID = tEntry->termID();
00062 storage->tf = tEntry->count();
00063 storage->val = tEntry->count();
00064 queryPassage->addTerm(*storage);
00065 }
00066 }
00067 } else {
00068 tList->startIteration();
00069 for (int i=0; i<10; i++) {
00070 if (tList->hasMore()) {
00071 tEntry = tList->nextEntry();
00072 storage = new termCount;
00073 storage->termID = tEntry->termID();
00074 storage->tf = tEntry->count();
00075 storage->val = tEntry->count();
00076 queryPassage->addTerm(*storage);
00077 }
00078 }
00079 }
00080 cout << "Autoquery: ";
00081 showPassage((*queryPassage).getAsVector(), idx);
00082 cout << endl;
00083
00084 return 1;
00085 }
00086
00087 int setMMRQuery(const string &qInfo) {
00088 if (qInfo != "") {
00089 termCount* storage;
00090 storage = new termCount;
00091 storage->termID = idx->term(qInfo);
00092 storage->tf = 1;
00093 storage->val = 1;
00094 queryPassage->addTerm(*storage);
00095 return 1;
00096 }
00097 return autoMMRQuery();
00098 }
00099
00100 public:
00101
00102 MMRSumm(const InvFPIndex* inIdx, int inSummLen = 5) {
00103 idx = inIdx;
00104 summLen = inSummLen;
00105 iterCount = 1;
00106 maxSims = -1.0;
00107 queryPassage = NULL;
00108 lambda = 1.0;
00109 };
00110
00111 virtual void markPassages(int optLen, const string &qInfo);
00112
00113 virtual void addPassage(Passage &psg);
00114
00115 void addDocument(const string &docID);
00116
00117 virtual int fetchPassages(Passage* psgs, int optLen) const;
00118
00119 virtual void summDocument(const string &docID, const int optLen, const string &qInfo);
00120
00121 virtual void scorePassages(const string &qInfo);
00122
00123 virtual void clear(void);
00124
00125 virtual int nextPassage(Passage* psg) const;
00126
00127 virtual void iterClear(void) const;
00128
00129 virtual void outputSumm(void) const;
00130
00131 void findNextPassage(MMRPassage &psg, const InvFPIndex* idx,
00132 const TermInfoList* tList, int eos);
00133
00134 void showPassage(const passageVec* psg, const InvFPIndex* idx) const;
00135
00136 void showMarkedPassages() const ;
00137
00138 int isEOS(const string &check) {
00139 return (check == EOS);
00140 }
00141
00142 int hasEOS(const InvFPIndex* idx, const TermInfoList* tList) {
00143 tList->startIteration();
00144 TermInfo* tEntry;
00145 while (tList->hasMore()) {
00146 tEntry = tList->nextEntry();
00147 if ( isEOS(idx->term(tEntry->termID())) ) return true;
00148 }
00149 return false;
00150 }
00151
00152 int isTITLE(const string & check) {
00153
00154 return (check == TITLE);
00155 }
00156
00157 int hasTITLE(const InvFPIndex* idx, const TermInfoList* tList) {
00158 tList->startIteration();
00159 TermInfo* tEntry;
00160 while (tList->hasMore()) {
00161 tEntry = tList->nextEntry();
00162 if ( isTITLE(idx->term(tEntry->termID())) ) return true;
00163 }
00164 return false;
00165 }
00166
00167 int isPRONOUN(const string &check) {
00168 return (check == PRONOUN);
00169 }
00170
00171 struct compareSW {
00172 double lambda;
00173 compareSW(double l) { lambda = l; }
00174 bool operator()(const MMRPassage p1, const MMRPassage p2) const {
00175 return p1.computeMMR(lambda) > p2.computeMMR(lambda);
00176 }
00177 };
00178
00179 };
00180
00181 #endif