00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _MMRSUMM_HPP
00013 #define _MMRSUMM_HPP
00014
00015 #include <iomanip>
00016 #include "Summarizer.hpp"
00017 #include "Passage.hpp"
00018 #include "MMRPassage.hpp"
00019 #include "Index.hpp"
00020 #include <algorithm>
00021 #include <vector>
00022 #include <string>
00023 using std::string;
00024 using std::vector;
00025
00026 namespace lemur
00027 {
00028 namespace summarization
00029 {
00035 class MMRSumm : public Summarizer {
00036
00037 private:
00038 double lambda;
00039 const lemur::api::Index* idx;
00040 int summLen;
00041 vector<MMRPassage> doc;
00042 mutable int iterCount;
00043 double maxSims;
00044 MMRPassage* queryPassage;
00045
00046 int autoMMRQuery(void) {
00047 lemur::api::TermInfo* tEntry;
00048 lemur::api::TermInfoList* tList = idx->termInfoListSeq(idx->document(queryPassage->docID));
00049
00050
00051
00052
00053 termCount storage;
00054 if (hasTITLE(idx, tList)) {
00055
00056 tList->startIteration();
00057 cout << "title found" << endl;
00058 while (tList->hasMore()) {
00059 tEntry = tList->nextEntry();
00060 if ( isTITLE(idx->term(tEntry->termID())) ) {
00061 tEntry = tList->nextEntry();
00062
00063
00064
00065
00066
00067 storage.termID = tEntry->termID();
00068 storage.tf = tEntry->count();
00069 storage.val = tEntry->count();
00070 queryPassage->addTerm(storage);
00071 }
00072 }
00073 } else {
00074 tList->startIteration();
00075 for (int i=0; i<10; i++) {
00076 if (tList->hasMore()) {
00077 tEntry = tList->nextEntry();
00078
00079
00080
00081
00082
00083 storage.termID = tEntry->termID();
00084 storage.tf = tEntry->count();
00085 storage.val = tEntry->count();
00086 queryPassage->addTerm(storage);
00087 }
00088 }
00089 }
00090 cout << "Autoquery: ";
00091 showPassage((*queryPassage).getAsVector(), idx);
00092 cout << endl;
00093
00094 return 1;
00095 }
00096
00097 int setMMRQuery(const string &qInfo) {
00098 if (qInfo != "") {
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108 termCount storage;
00109 storage.termID = idx->term(qInfo);
00110 storage.tf = 1;
00111 storage.val = 1;
00112 queryPassage->addTerm(storage);
00113 return 1;
00114 }
00115 return autoMMRQuery();
00116 }
00117
00118 public:
00119
00120 MMRSumm(const lemur::api::Index* inIdx, int inSummLen = 5) :
00121 idx(inIdx), summLen(inSummLen), iterCount(1), maxSims(-1.0),
00122 queryPassage(NULL), lambda(1.0) {};
00123
00124 virtual void markPassages(int optLen, const string &qInfo);
00125
00126 virtual void addPassage(Passage &psg);
00127
00128 void addDocument(const string &docID);
00129
00130 virtual int fetchPassages(Passage* psgs, int optLen) const;
00131
00132 virtual void summDocument(const string &docID, const int optLen, const string &qInfo);
00133
00134 virtual void scorePassages(const string &qInfo);
00135
00136 virtual void clear(void);
00137
00138 virtual int nextPassage(Passage* psg) const;
00139
00140 virtual void iterClear(void) const;
00141
00142 virtual void outputSumm(void) const;
00143
00144 void findNextPassage(MMRPassage &psg,
00145 const lemur::api::Index* idx,
00146 const lemur::api::TermInfoList* tList, int eos);
00147
00148 void showPassage(const passageVec* psg,
00149 const lemur::api::Index* idx) const;
00150
00151 void showMarkedPassages() const ;
00152
00153 int isEOS(const string &check) {
00154 return (check == EOS);
00155 }
00156
00157 int hasEOS(const lemur::api::Index* idx,
00158 const lemur::api::TermInfoList* tList) {
00159 tList->startIteration();
00160 lemur::api::TermInfo* tEntry;
00161 while (tList->hasMore()) {
00162 tEntry = tList->nextEntry();
00163 if ( isEOS(idx->term(tEntry->termID())) ) return true;
00164 }
00165 return false;
00166 }
00167
00168 int isTITLE(const string & check) {
00169
00170 return (check == TITLE);
00171 }
00172
00173 int hasTITLE(const lemur::api::Index* idx,
00174 const lemur::api::TermInfoList* tList) {
00175 tList->startIteration();
00176 lemur::api::TermInfo* tEntry;
00177 while (tList->hasMore()) {
00178 tEntry = tList->nextEntry();
00179 if ( isTITLE(idx->term(tEntry->termID())) ) return true;
00180 }
00181 return false;
00182 }
00183
00184 int isPRONOUN(const string &check) {
00185 return (check == PRONOUN);
00186 }
00187
00188 struct compareSW {
00189 double lambda;
00190 compareSW(double l) { lambda = l; }
00191 bool operator()(const MMRPassage p1, const MMRPassage p2) const {
00192 return p1.computeMMR(lambda) > p2.computeMMR(lambda);
00193 }
00194 };
00196 static const string TITLE;
00198 static const string PRONOUN;
00199 };
00200 }
00201 }
00202
00203 #endif