Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

TFIDFRetMethod.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 
00014 #ifndef _TFIDFRETMETHOD_HPP
00015 #define _TFIDFRETMETHOD_HPP
00016 
00017 #include "TextQueryRetMethod.hpp"
00018 
00020 namespace TFIDFParameter {
00021 
00022   enum TFMethod  {RAWTF=0, LOGTF=1, BM25=2};
00023   struct WeightParam {
00024     TFMethod tf;
00025     double bm25K1;
00026     double bm25B;
00027   };
00028   struct FeedbackParam {
00029     int howManyTerms;
00030     double posCoeff;
00031   };    
00032   static double defaultDocK1=1;
00033   static double defaultDocB = 0.5;
00034   static double defaultQryK1 = 1;
00035   static double defaultQryB = 0;
00036   static int defaultHowManyTerms = 50;
00037   static double defaultPosCoeff = 0.5;
00038 }
00039 namespace lemur 
00040 {
00041   namespace retrieval 
00042   {
00043     
00045     class TFIDFQueryRep : public ArrayQueryRep {
00046     public:
00047       TFIDFQueryRep(const lemur::api::TermQuery &qry, const lemur::api::Index &dbIndex, double *idfValue, TFIDFParameter::WeightParam &param);
00048 
00049       virtual ~TFIDFQueryRep() {}
00050 
00051       double queryTFWeight(const double rawTF) const;
00052     protected:
00053       TFIDFParameter::WeightParam &prm;
00054       double *idf;
00055       const lemur::api::Index &ind;
00056     };
00057 
00059     class TFIDFDocRep : public lemur::api::DocumentRep {
00060     public:
00061       TFIDFDocRep(lemur::api::DOCID_T docID, const lemur::api::Index &dbIndex, double *idfValue,
00062                   TFIDFParameter::WeightParam &param) : 
00063         lemur::api::DocumentRep(docID, dbIndex.docLength(docID)), ind(dbIndex), prm(param), idf(idfValue) {
00064       }
00065       virtual ~TFIDFDocRep() { }
00066       virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const{ 
00067         return (idf[termID]*docTFWeight(info->termCount())); 
00068       }
00069       virtual double scoreConstant() const { return 0;}
00070 
00071       double docTFWeight(const double rawTF) const;
00072     private:
00073 
00074       const lemur::api::Index & ind;
00075       TFIDFParameter::WeightParam &prm;
00076       double *idf;
00077     };
00078 
00079 
00081 
00082     class TFIDFRetMethod : public lemur::api::TextQueryRetMethod {
00083     public:
00084 
00085       TFIDFRetMethod(const lemur::api::Index &dbIndex, lemur::api::ScoreAccumulator &accumulator);
00086       virtual ~TFIDFRetMethod() {delete [] idfV; delete scFunc;}
00087 
00088       virtual lemur::api::TextQueryRep *computeTextQueryRep(const lemur::api::TermQuery &qry) {
00089         return (new TFIDFQueryRep(qry, ind, idfV, qryTFParam));
00090       }
00091 
00092       virtual lemur::api::DocumentRep *computeDocRep(lemur::api::DOCID_T docID) { 
00093         return (new TFIDFDocRep(docID, ind, idfV, docTFParam));
00094       }
00095       virtual lemur::api::ScoreFunction *scoreFunc() {
00096         return (scFunc);
00097       }
00098 
00099 
00100       virtual void updateTextQuery(lemur::api::TextQueryRep &qryRep, 
00101                                    const lemur::api::DocIDSet &relDocs);
00102 
00103       void setDocTFParam(TFIDFParameter::WeightParam &docTFWeightParam);
00104 
00105       void setQueryTFParam(TFIDFParameter::WeightParam &queryTFWeightParam);
00106 
00107       void setFeedbackParam(TFIDFParameter::FeedbackParam &feedbackParam);
00108 
00109       static double BM25TF(const double rawTF, const double k1, const double b, 
00110                            const double docLen, const double avgDocLen);
00111 
00112     protected:
00113       double *idfV;
00114       lemur::api::ScoreFunction *scFunc;
00115   
00117 
00118 
00119       TFIDFParameter::WeightParam qryTFParam;
00120       TFIDFParameter::WeightParam docTFParam;
00121       TFIDFParameter::FeedbackParam fbParam;
00122 
00124 
00125     };
00126 
00127 
00128     inline void TFIDFRetMethod::setDocTFParam(TFIDFParameter::WeightParam &docTFWeightParam)
00129     {
00130       docTFParam = docTFWeightParam;
00131     }
00132 
00133 
00134 
00135     inline void TFIDFRetMethod::setQueryTFParam(TFIDFParameter::WeightParam &queryTFWeightParam)
00136     {
00137       qryTFParam = queryTFWeightParam;
00138     }
00139 
00140 
00141     inline void TFIDFRetMethod::setFeedbackParam(TFIDFParameter::FeedbackParam &feedbackParam)
00142     {
00143       fbParam = feedbackParam;
00144     }
00145 
00146 
00147 
00148     inline double TFIDFRetMethod ::BM25TF(const double rawTF, const double k1, const double b, 
00149                                           const double docLen, const  double avgDocLen)
00150     {
00151       double x= rawTF+k1*(1-b+b*docLen/avgDocLen);
00152       return (k1*rawTF/x);
00153     }
00154  
00155   }
00156 }
00157 
00158 
00159 #endif /* _TFIDFRETMETHOD_HPP */

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4