Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

CORIRetMethod.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #ifndef _CORIRETMETHOD_HPP
00013 #define _CORIRETMETHOD_HPP
00014 
00015 
00016 #include "UnigramLM.hpp"
00017 #include "SimpleKLDocModel.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include "Param.hpp"
00020 #include <math.h>
00021 #include <iostream>
00022 
00024 #define CSTFBASELINE 50
00025 #define CSTFFACTOR 150
00026 #define DOCTFBASELINE 0.5
00027 #define DOCTFFACTOR 1.5
00028 #define MINBELIEF 0.4
00029 namespace lemur 
00030 {
00032   namespace retrieval
00033   {
00034     
00035     class CORIQueryRep : public ArrayQueryRep {
00036     public:
00037       CORIQueryRep(const lemur::api::TermQuery & qry, const lemur::api::Index & dbIndex);
00038       virtual ~CORIQueryRep() {}
00039 
00040     protected:
00041       const lemur::api::Index & ind;
00042     };
00043 
00044     class CORIDocRep : public lemur::api::DocumentRep {
00045     public:
00046       CORIDocRep(lemur::api::DOCID_T docID, const lemur::api::Index & dbIndex, double * cwRatio, 
00047                  double TFfact = 150, double TFbase = 50, 
00048                  const SimpleKLDocModel * smoother = NULL,
00049                  const lemur::langmod::UnigramLM * collectLM = NULL);
00050       virtual ~CORIDocRep() { }
00051       virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo * info) const ;
00052 
00053       virtual double scoreConstant() const { return 0; }
00054 
00055     private:
00056 
00057       const lemur::api::Index & ind;
00058 
00059       int * cwCounts;
00060 
00061       const SimpleKLDocModel * dfSmooth;
00062       const lemur::langmod::UnigramLM * collLM;
00063 
00064       double c05;
00065       double idiv;
00066       double tnorm;
00067     };
00068 
00069 
00070 
00071     class CORIRetMethod : public lemur::api::TextQueryRetMethod {
00072     public:
00073 
00074       CORIRetMethod(const lemur::api::Index & dbIndex, 
00075                     lemur::api::ScoreAccumulator &accumulator, 
00076                     lemur::utility::String cwName, int isCSIndex=0,
00077                     const SimpleKLDocModel ** smoothers = NULL, 
00078                     const lemur::langmod::UnigramLM * collectLM = NULL);
00079       ~CORIRetMethod() { delete scFunc; delete [] cwRatio; }
00080 
00081       virtual lemur::api::TextQueryRep * computeTextQueryRep(const lemur::api::TermQuery & qry) {
00082         return new CORIQueryRep(qry, ind);
00083       }
00084       virtual lemur::api::DocumentRep * computeDocRep(lemur::api::DOCID_T docID) { 
00085         if (dfSmooth != NULL) {
00086           return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline, dfSmooth[docID], collLM);
00087         }
00088         return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline);
00089       }
00090       virtual lemur::api::ScoreFunction * scoreFunc() {
00091         return scFunc;
00092       }
00093 
00094       virtual void scoreCollection(const lemur::api::QueryRep &qry, lemur::api::IndexedRealVector &results);
00095 
00096       virtual void updateTextQuery(lemur::api::TextQueryRep &qryRep, const lemur::api::DocIDSet &relDocs) { }
00097   
00098       void setTFFactor(double tf) { tffactor = tf; }
00099       void setTFBaseline(double tf) { tfbaseline = tf; }
00100 
00101     protected:
00102 
00103       lemur::api::ScoreFunction * scFunc;
00104       const SimpleKLDocModel ** dfSmooth;
00105       const lemur::langmod::UnigramLM * collLM;
00106 
00107       double * cwRatio;
00108       double tffactor;
00109       double tfbaseline;
00110   
00111     };
00112 
00113     class CORIScoreFunc : public lemur::api::ScoreFunction {
00114     public:
00115       CORIScoreFunc(const lemur::api::Index & index) : ind(index) {
00116         rmax=0;
00117         double dc = ind.docCount();
00118         c05 = dc + 0.5;
00119         idiv = log(dc + 1);
00120         //    qr=NULL;
00121         first=0;
00122       }
00123 
00124       virtual double adjustedScore(double origScore, const lemur::api::TextQueryRep * qRep,
00125                                    const lemur::api::DocumentRep * dRep) const {
00126         /*
00127           if (qr != qRep) {
00128           qr = qRep;
00129       
00130           qRep->startIteration();
00131           rmax = 0;
00132           double qw = 0;
00133           while (qRep->hasMore()) {
00134           lemur::api::TERMID_T qtid = qRep->nextTerm()->id();
00135           rmax += (1-MINBELIEF)*(log(c05 / ind.docCount(qtid)) / idiv);
00136           }
00137           }
00138           if ((origScore/rmax)>=1){
00139           cout<<"!!!!!!!!!"<<endl;
00140           cout<<origScore<<" "<<rmax<<" "<<(origScore / rmax)<<endl;
00141           }*/
00142         //return (origScore / rmax);
00143         return origScore;
00144       }
00145 
00146     private:
00147       const lemur::api::Index & ind;
00148       int first;
00149       //TextQueryRep * qr;
00150       double rmax;
00151       double c05;
00152       double idiv;
00153     };
00154   }
00155 }
00156 
00157 
00158 #endif /* _CORIRETMETHOD_HPP */

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4