00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014 #include "DocumentRep.hpp"
00015 #include <cmath>
00016
00017 namespace lemur
00018 {
00019 namespace retrieval
00020 {
00021
00024 class StructQryDocRep : public lemur::api::DocumentRep {
00025 public:
00027 StructQryDocRep(lemur::api::DOCID_T docID, double *idfValue, int docLength, int docCount,
00028 double docLengthAverage, double db) :
00029 lemur::api::DocumentRep(docID), did(docID), idf(idfValue), end(docLength),
00030 docEnd(docLength), size(docLength), start(0),
00031 dla(docLengthAverage), defaultBelief(db) {
00032 oneMinusDB = 1 - defaultBelief;
00033 denom = log(docCount + 1.0);
00034 numer = docCount + 0.5;
00035 }
00037 virtual ~StructQryDocRep() {}
00039 virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const { return 0;}
00041 virtual double termWeight(lemur::api::TERMID_T termID, double dtf, int df) const{
00042 if (idf)
00043 return beliefScore(dtf, idf[termID]);
00044 else
00045 return beliefScore(dtf, computeIdfScore(df));
00046 }
00048 virtual double scoreConstant() const { return 0;}
00049
00051 void startPassageIteration(int sz) const {
00052 size = sz;
00053 increment = size/2;
00054 start = 0;
00055 end = size < docEnd ? size : docEnd;
00056 }
00058 bool hasMorePassage() const {
00059
00060 return(start < docEnd);
00061 }
00062
00064 void nextPassage() const{
00065 if(start + increment < docEnd)
00066 start += increment;
00067 else
00068 start = docEnd;
00069 end = (start + size) < docEnd ? (start + size) : docEnd;
00070 }
00071
00074 double computeIdfScore(double df) const {
00075 return log(numer/df)/denom;
00076 }
00077
00079 double beliefScore(double df, double idf) const {
00080 return (defaultBelief + oneMinusDB
00081 * (df / (df + 0.5 + 1.5* ((end - start)/dla))) * idf);
00082 }
00083
00085 lemur::api::DOCID_T did;
00087 mutable int start;
00089 mutable int end;
00090
00091 private:
00093 double *idf;
00095 mutable int size;
00097 mutable int increment;
00099 int docEnd;
00101 double dla;
00103 double numer, denom;
00105 double defaultBelief, oneMinusDB;
00106 };
00107 }
00108 }
00109
00110 #endif