00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _OKAPIRETMETHOD_HPP
00014 #define _OKAPIRETMETHOD_HPP
00015
00016 #include "TextQueryRep.hpp"
00017 #include "ScoreFunction.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include <cmath>
00020
00022
00023 namespace OkapiParameter {
00024 struct TFParam {
00025 double k1;
00026 double b;
00027 double k3;
00028 };
00029
00030 static double defaultK1 = 1.2;
00031 static double defaultB = 0.75;
00032 static double defaultK3 = 7;
00033
00034 struct FeedbackParam {
00036 double expQTF;
00038 int howManyTerms;
00039 };
00040
00041 static double defaultExpQTF = 0.5;
00042 static int defaultHowManyTerms = 50;
00043 }
00044 namespace lemur
00045 {
00046 namespace retrieval
00047 {
00048
00050
00051 class OkapiQueryTerm : public lemur::api::QueryTerm {
00052 public:
00053 OkapiQueryTerm(lemur::api::TERMID_T termID, double count, int pEstCount, double paramK3) : QueryTerm(termID, count), pEst(pEstCount), k3(paramK3) {
00054 }
00056 virtual int pEstCount() const { return pEst;}
00057
00059 virtual double weight() const {
00060 return ((k3+1)*w/(k3+w));
00061 }
00062 private:
00063 int pEst;
00064 double k3;
00065 };
00066
00068
00069 class OkapiScoreFunc : public lemur::api::ScoreFunction {
00070 public:
00071 OkapiScoreFunc(const lemur::api::Index &dbIndex): ind(dbIndex) {}
00072 virtual double matchedTermWeight(const lemur::api::QueryTerm *qTerm, const lemur::api::TextQueryRep *qRep, const lemur::api::DocInfo *info, const lemur::api::DocumentRep *dRep) const;
00073 protected:
00074 const lemur::api::Index &ind;
00075 };
00076
00077
00079 class OkapiQueryRep : public ArrayQueryRep {
00080 public:
00081
00082 OkapiQueryRep(const lemur::api::TermQuery &qry, const lemur::api::Index &dbIndex, double paramK3);
00083
00084 virtual ~OkapiQueryRep() { delete [] pEst; }
00086 int pNormCount() const { return pNorm;}
00088 void setPNormCount(int count) { pNorm = count;}
00090 void incPEst(int wdIndex, int val) { pEst[wdIndex]+=val;}
00091 protected:
00092 virtual lemur::api::QueryTerm *makeQueryTerm(lemur::api::TERMID_T wdIndex, double wdCount) const{
00093 return (new OkapiQueryTerm(wdIndex, wdCount, pEst[wdIndex], k3));
00094 }
00095 double k3;
00096 int pNorm;
00097 int *pEst;
00098 };
00099
00101
00102 class OkapiDocRep : public lemur::api::DocumentRep {
00103 public:
00104 OkapiDocRep(lemur::api::DOCID_T docID, const lemur::api::Index &dbIndex, OkapiParameter::TFParam ¶m) : lemur::api::DocumentRep(docID, dbIndex.docLength(docID)), ind(dbIndex),
00105 prm(param) {
00106 }
00107 virtual ~OkapiDocRep() { }
00108 virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const;
00109 double BM25TF(double rawTF, double docLength) const;
00110 virtual double scoreConstant() const { return 0;}
00111 protected:
00112 const lemur::api::Index &ind;
00113 OkapiParameter::TFParam &prm;
00114 };
00115
00117
00118 class OkapiRetMethod : public lemur::api::TextQueryRetMethod {
00119 public:
00120
00121
00122 OkapiRetMethod(const lemur::api::Index &dbIndex, lemur::api::ScoreAccumulator &accumulator);
00123
00124 virtual ~OkapiRetMethod() { delete scFunc;}
00125
00126 virtual lemur::api::TextQueryRep *computeTextQueryRep(const lemur::api::TermQuery &qry) {
00127 return (new OkapiQueryRep(qry, ind, tfParam.k3));
00128 }
00129
00130 virtual lemur::api::DocumentRep *computeDocRep(lemur::api::DOCID_T docID) {
00131 return (new OkapiDocRep(docID, ind, tfParam));
00132 }
00133
00134 virtual lemur::api::ScoreFunction *scoreFunc();
00135
00137 virtual void updateTextQuery(lemur::api::TextQueryRep &origRep, const lemur::api::DocIDSet &relDocs);
00138
00139 void setTFParam(OkapiParameter::TFParam &tfWeightParam);
00140
00141 void setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam);
00142
00144 static double RSJWeight(double r, double R,
00145
00146 double n, double N) {
00147 return (log ((r+0.5)*(N-n-R+r+0.5)/((R-r+0.5)*(n-r+0.5))));
00148 }
00149
00150 protected:
00151 OkapiScoreFunc *scFunc;
00152
00153 OkapiParameter::TFParam tfParam;
00154 OkapiParameter::FeedbackParam fbParam;
00155
00156
00157 };
00158
00159
00160 inline void OkapiRetMethod::setTFParam(OkapiParameter::TFParam &tfWeightParam)
00161 {
00162 tfParam = tfWeightParam;
00163 }
00164
00165 inline void OkapiRetMethod::setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam)
00166 {
00167 fbParam = feedbackParam;
00168 }
00169
00170 inline double OkapiDocRep::BM25TF(double rawTF, double docLength) const
00171 {
00172 return ((prm.k1+1)*rawTF/(rawTF + prm.k1*(1-prm.b + prm.b*docLength/ind.docLengthAvg())));
00173 }
00174
00175
00176 inline double OkapiDocRep::termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const
00177 {
00178 return BM25TF(info->termCount(), ind.docLength(info->docID()));
00179 }
00180
00181 inline lemur::api::ScoreFunction *OkapiRetMethod::scoreFunc()
00182 {
00183 return scFunc;
00184 }
00185 }
00186 }
00187
00188
00189 #endif