00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _CORIRETMETHOD_HPP
00013 #define _CORIRETMETHOD_HPP
00014
00015
00016 #include "UnigramLM.hpp"
00017 #include "SimpleKLDocModel.hpp"
00018 #include "TextQueryRetMethod.hpp"
00019 #include "Param.hpp"
00020 #include <math.h>
00021 #include <iostream>
00022
00024 #define CSTFBASELINE 50
00025 #define CSTFFACTOR 150
00026 #define DOCTFBASELINE 0.5
00027 #define DOCTFFACTOR 1.5
00028 #define MINBELIEF 0.4
00029 namespace lemur
00030 {
00032 namespace retrieval
00033 {
00034
00035 class CORIQueryRep : public ArrayQueryRep {
00036 public:
00037 CORIQueryRep(const lemur::api::TermQuery & qry, const lemur::api::Index & dbIndex);
00038 virtual ~CORIQueryRep() {}
00039
00040 protected:
00041 const lemur::api::Index & ind;
00042 };
00043
00044 class CORIDocRep : public lemur::api::DocumentRep {
00045 public:
00046 CORIDocRep(lemur::api::DOCID_T docID, const lemur::api::Index & dbIndex, double * cwRatio,
00047 double TFfact = 150, double TFbase = 50,
00048 const SimpleKLDocModel * smoother = NULL,
00049 const lemur::langmod::UnigramLM * collectLM = NULL);
00050 virtual ~CORIDocRep() { }
00051 virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo * info) const ;
00052
00053 virtual double scoreConstant() const { return 0; }
00054
00055 private:
00056
00057 const lemur::api::Index & ind;
00058
00059 int * cwCounts;
00060
00061 const SimpleKLDocModel * dfSmooth;
00062 const lemur::langmod::UnigramLM * collLM;
00063
00064 double c05;
00065 double idiv;
00066 double tnorm;
00067 };
00068
00069
00070
00071 class CORIRetMethod : public lemur::api::TextQueryRetMethod {
00072 public:
00073
00074 CORIRetMethod(const lemur::api::Index & dbIndex,
00075 lemur::api::ScoreAccumulator &accumulator,
00076 lemur::utility::String cwName, int isCSIndex=0,
00077 const SimpleKLDocModel ** smoothers = NULL,
00078 const lemur::langmod::UnigramLM * collectLM = NULL);
00079 ~CORIRetMethod() { delete scFunc; delete [] cwRatio; }
00080
00081 virtual lemur::api::TextQueryRep * computeTextQueryRep(const lemur::api::TermQuery & qry) {
00082 return new CORIQueryRep(qry, ind);
00083 }
00084 virtual lemur::api::DocumentRep * computeDocRep(lemur::api::DOCID_T docID) {
00085 if (dfSmooth != NULL) {
00086 return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline, dfSmooth[docID], collLM);
00087 }
00088 return new CORIDocRep(docID, ind, cwRatio, tffactor, tfbaseline);
00089 }
00090 virtual lemur::api::ScoreFunction * scoreFunc() {
00091 return scFunc;
00092 }
00093
00094 virtual void scoreCollection(const lemur::api::QueryRep &qry, lemur::api::IndexedRealVector &results);
00095
00096 virtual void updateTextQuery(lemur::api::TextQueryRep &qryRep, const lemur::api::DocIDSet &relDocs) { }
00097
00098 void setTFFactor(double tf) { tffactor = tf; }
00099 void setTFBaseline(double tf) { tfbaseline = tf; }
00100
00101 protected:
00102
00103 lemur::api::ScoreFunction * scFunc;
00104 const SimpleKLDocModel ** dfSmooth;
00105 const lemur::langmod::UnigramLM * collLM;
00106
00107 double * cwRatio;
00108 double tffactor;
00109 double tfbaseline;
00110
00111 };
00112
00113 class CORIScoreFunc : public lemur::api::ScoreFunction {
00114 public:
00115 CORIScoreFunc(const lemur::api::Index & index) : ind(index) {
00116 rmax=0;
00117 double dc = ind.docCount();
00118 c05 = dc + 0.5;
00119 idiv = log(dc + 1);
00120
00121 first=0;
00122 }
00123
00124 virtual double adjustedScore(double origScore, const lemur::api::TextQueryRep * qRep,
00125 const lemur::api::DocumentRep * dRep) const {
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143 return origScore;
00144 }
00145
00146 private:
00147 const lemur::api::Index & ind;
00148 int first;
00149
00150 double rmax;
00151 double c05;
00152 double idiv;
00153 };
00154 }
00155 }
00156
00157
00158 #endif