00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _PASSAGEREP_HPP
00013 #define _PASSAGEREP_HPP
00014
00015 #include <vector>
00016 #include "MatchInfo.hpp"
00017
00018 namespace lemur
00019 {
00020 namespace retrieval
00021 {
00022
00024 struct PassageScore {
00026 int id;
00028 int start;
00030 int end;
00032 double score;
00033 };
00034
00036 class PassageScoreVector : public vector<PassageScore> {
00037 public:
00038 PassageScoreVector() : vector<PassageScore>() {
00039 }
00041 void sortScores() {
00042 sort(this->begin(), this->end(), cmpFn);
00043 }
00044 private:
00045 class PassageScoreDescending {
00046 public:
00047 bool operator()(const PassageScore & a, const PassageScore & b) {
00048 return a.score > b.score;
00049 }
00050 };
00051 static PassageScoreDescending cmpFn;
00052 };
00054
00060 class PassageRep : public lemur::api::DocumentRep {
00061 public:
00067 PassageRep(lemur::api::DocumentRep &dRep, int d, int p, int o) :
00068 lemur::api::DocumentRep(dRep.getID(), p),
00069 docRep(dRep), docEnd(d), psgSize(p), overlap(o) {
00070
00071 docRep.setDocLength(docLength);
00072 }
00073
00074 #if 0
00075 PassageRep(): DocumentRep(0, 0), docRep(*this) {
00076 }
00077 #endif
00078
00079 #if 0
00080
00081 void startPassageIteration() {
00082 start = 0;
00083 end = psgSize < docEnd ? psgSize : docEnd;
00084 }
00086 bool hasMorePassage() {
00087 return(start < docEnd);
00088 }
00090 void nextPassage() {
00091 int next = start + (end - overlap);
00092 if(next < docEnd)
00093 start = next;
00094 else
00095 start = docEnd;
00096 end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00097 docLength = end - start;
00098
00099 docRep.setDocLength(docLength);
00100 }
00101 #endif
00102
00103 class iterator {
00104 public:
00105 iterator() : start(0), end(0), psgSize(0), overlap(0), docEnd(0),
00106 rep(NULL) {};
00107 iterator(int s, int e, int p, int o, int d, PassageRep *r) : start(s),
00108 end(e),
00109 psgSize(p),
00110 overlap(o),
00111 docEnd(d) {
00112 rep = new PassageRep(*r);
00113 };
00114
00115 virtual ~iterator() {
00116 delete(rep);
00117 };
00119 virtual PassageRep &operator*(){ return *rep;};
00120
00121 virtual iterator& operator++(){
00122 int next = start + (end - overlap);
00123 if(next < docEnd)
00124 start = next;
00125 else
00126 start = docEnd;
00127 end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00128 int docLength = end - start;
00129
00130 rep->setEnd(start, end, docLength);
00131 return *this;
00132 };
00133
00134 virtual iterator& operator++(int){
00135 int next = start + (end - overlap);
00136 if(next < docEnd)
00137 start = next;
00138 else
00139 start = docEnd;
00140 end = (start + psgSize) < docEnd ? (start + psgSize) : docEnd;
00141 int docLength = end - start;
00142
00143 rep->setEnd(start, end, docLength);
00144 return *this;
00145 };
00147 virtual bool operator==(iterator& other)
00148 {
00149 return (other.start == start && other.end == end);
00150 };
00152 virtual bool operator!=(iterator& other)
00153 {
00154 return !(other.start == start && other.end == end);
00155 };
00156 protected:
00158 PassageRep *rep;
00160 int psgSize;
00162 int overlap;
00164 int docEnd;
00166 int start;
00168 int end;
00169 };
00170
00171
00172 PassageRep::iterator begin() {
00173 start = 0;
00174 pEnd = psgSize < docEnd ? psgSize : docEnd;
00175
00176 PassageRep::iterator retval(0, pEnd, psgSize, overlap, docEnd, this);
00177 return retval;
00178 }
00179
00180 PassageRep::iterator end() {
00181 PassageRep::iterator retval (docEnd, docEnd, psgSize, overlap, docEnd,
00182 this);
00183 return retval;
00184 }
00186 void setEnd(int s, int e, int dl) {
00187 start = s;
00188 pEnd = e;
00189 docRep.setDocLength(dl);
00190 }
00197 int passageTF(lemur::api::TERMID_T tid, lemur::api::MatchInfo *matches) const {
00198 int tf = 0;
00199 int pos = 0;
00200 lemur::api::MatchInfo::iterator m = matches->begin();
00201 while (m != matches->end() && pos < pEnd) {
00202 lemur::api::TMatch match = *m;
00203 pos = match.position;
00204
00205 if (pos > docEnd) docEnd = pos + 1;
00206 if (match.tid == tid) {
00207 if (pos >= start && pos < pEnd) {
00208 tf++;
00209 }
00210 }
00211 m++;
00212 }
00213 return tf;
00214 }
00216 int getStart () const {return start;}
00218 int getEnd () const {return pEnd;}
00219
00221 virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const {
00222 return docRep.termWeight(termID, info);
00223 }
00224
00226 virtual double scoreConstant() const {
00227 return docRep.scoreConstant();
00228 }
00229
00230
00231 protected:
00233 lemur::api::DocumentRep &docRep;
00235 int psgSize;
00237 int overlap;
00239 mutable int docEnd;
00241 mutable int start;
00243 mutable int pEnd;
00244 };
00245 }
00246 }
00247
00248 #endif
00249
00250
00251