Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

OkapiRetMethod.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 #ifndef _OKAPIRETMETHOD_HPP
00014 #define _OKAPIRETMETHOD_HPP
00015 
00016 #include "TextQueryRep.hpp"
00017 #include "ScoreFunction.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include <cmath>
00020 
00022 
00023 namespace OkapiParameter {
00024   struct TFParam {
00025     double k1;
00026     double b;
00027     double k3;
00028   }; 
00029 
00030   static double defaultK1 = 1.2;
00031   static double defaultB = 0.75;
00032   static double defaultK3 = 7;
00033   
00034   struct FeedbackParam {
00036     double expQTF; // expanded query term TF
00038     int howManyTerms;
00039   };
00040 
00041   static double defaultExpQTF = 0.5;
00042   static int defaultHowManyTerms = 50;
00043 }
00044 namespace lemur 
00045 {
00046   namespace retrieval 
00047   {
00048     
00050 
00051     class OkapiQueryTerm : public lemur::api::QueryTerm {
00052     public:
00053       OkapiQueryTerm(lemur::api::TERMID_T termID, double count, int pEstCount, double paramK3) : QueryTerm(termID, count), pEst(pEstCount), k3(paramK3) {
00054       }
00056       virtual int pEstCount() const { return pEst;}
00057 
00059       virtual double weight() const { 
00060         return ((k3+1)*w/(k3+w));
00061       } 
00062     private:
00063       int pEst;
00064       double k3;
00065     };
00066 
00068 
00069     class OkapiScoreFunc : public lemur::api::ScoreFunction {
00070     public:
00071       OkapiScoreFunc(const lemur::api::Index &dbIndex): ind(dbIndex) {}
00072       virtual double matchedTermWeight(const lemur::api::QueryTerm *qTerm, const lemur::api::TextQueryRep *qRep, const lemur::api::DocInfo *info, const lemur::api::DocumentRep *dRep) const;
00073     protected:
00074       const lemur::api::Index &ind;
00075     };
00076 
00077 
00079     class OkapiQueryRep : public ArrayQueryRep {
00080     public:
00081       // initial query constructor, no feedback docs assumed
00082       OkapiQueryRep(const lemur::api::TermQuery &qry, const lemur::api::Index &dbIndex, double paramK3);
00083 
00084       virtual ~OkapiQueryRep() { delete [] pEst; }
00086       int pNormCount() const { return pNorm;}
00088       void setPNormCount(int count) { pNorm = count;}
00090       void incPEst(int wdIndex, int val) { pEst[wdIndex]+=val;}
00091     protected:
00092       virtual lemur::api::QueryTerm *makeQueryTerm(lemur::api::TERMID_T wdIndex, double wdCount) const{
00093         return (new OkapiQueryTerm(wdIndex, wdCount, pEst[wdIndex], k3));
00094       }
00095       double k3;
00096       int pNorm;
00097       int *pEst;
00098     };
00099 
00101 
00102     class OkapiDocRep : public lemur::api::DocumentRep {
00103     public:
00104       OkapiDocRep(lemur::api::DOCID_T docID, const lemur::api::Index &dbIndex, OkapiParameter::TFParam &param) : lemur::api::DocumentRep(docID, dbIndex.docLength(docID)), ind(dbIndex),
00105                                                                                                                  prm(param) {
00106       }
00107       virtual ~OkapiDocRep() { }
00108       virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const;
00109       double BM25TF(double rawTF, double docLength) const;
00110       virtual double scoreConstant() const { return 0;}
00111     protected:
00112       const lemur::api::Index &ind;
00113       OkapiParameter::TFParam &prm;
00114     };
00115 
00117 
00118     class OkapiRetMethod : public lemur::api::TextQueryRetMethod  {
00119     public:
00120 
00121 
00122       OkapiRetMethod(const lemur::api::Index &dbIndex, lemur::api::ScoreAccumulator &accumulator);
00123 
00124       virtual ~OkapiRetMethod() { delete scFunc;}
00125 
00126       virtual lemur::api::TextQueryRep *computeTextQueryRep(const lemur::api::TermQuery &qry) {
00127         return (new OkapiQueryRep(qry, ind, tfParam.k3));
00128       }
00129 
00130       virtual lemur::api::DocumentRep *computeDocRep(lemur::api::DOCID_T docID) {
00131         return (new OkapiDocRep(docID, ind, tfParam));
00132       }
00133 
00134       virtual lemur::api::ScoreFunction *scoreFunc();
00135 
00137       virtual void updateTextQuery(lemur::api::TextQueryRep &origRep, const lemur::api::DocIDSet &relDocs);
00138 
00139       void setTFParam(OkapiParameter::TFParam &tfWeightParam);
00140 
00141       void setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam);
00142 
00144       static double RSJWeight(double r, double R, 
00145 
00146                               double n, double N) {
00147         return (log ((r+0.5)*(N-n-R+r+0.5)/((R-r+0.5)*(n-r+0.5))));
00148       }
00149 
00150     protected:
00151       OkapiScoreFunc *scFunc;
00152 
00153       OkapiParameter::TFParam tfParam;
00154       OkapiParameter::FeedbackParam fbParam;
00155 
00156 
00157     };
00158 
00159 
00160     inline void OkapiRetMethod::setTFParam(OkapiParameter::TFParam &tfWeightParam)
00161     {
00162       tfParam = tfWeightParam;
00163     }
00164 
00165     inline void OkapiRetMethod::setFeedbackParam(OkapiParameter::FeedbackParam &feedbackParam)
00166     {
00167       fbParam = feedbackParam;
00168     }
00169 
00170     inline double OkapiDocRep::BM25TF(double rawTF, double docLength) const 
00171     {
00172       return ((prm.k1+1)*rawTF/(rawTF +  prm.k1*(1-prm.b + prm.b*docLength/ind.docLengthAvg())));
00173     }
00174 
00175 
00176     inline double OkapiDocRep::termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const
00177     {
00178       return BM25TF(info->termCount(), ind.docLength(info->docID()));
00179     } 
00180 
00181     inline lemur::api::ScoreFunction *OkapiRetMethod::scoreFunc()
00182     {
00183       return scFunc;
00184     }
00185   }
00186 }
00187 
00188 
00189 #endif /* _OKAPIRETMETHOD_HPP */

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4