Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

QueryNode.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002-2003 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 /*
00012   author: fff, dmf
00013   * 03/31/2003 -- Split QueryNode out of StructQueryRep, refactor some 
00014   * code out of rep into node classes (more OOP that way).
00015   */
00016 
00017 #ifndef _QUERYNODE_HPP
00018 #define _QUERYNODE_HPP
00019 #include "StructQryDocRep.hpp"
00020 #include "ProxInfo.hpp"
00021 namespace lemur 
00022 {
00023   namespace retrieval 
00024   {
00025     
00026     // forward declaration
00027     class QueryNode;
00028 
00030 
00033     class QnList {
00034     public:
00035       QnList() {}
00036       ~QnList(); 
00038       void startIteration() {i = 0;}
00040       bool hasMore() {  return (i < qnList.size()); }
00042       QueryNode * nextNode()  {QueryNode *qn = qnList[i++]; return qn; }
00044       const QueryNode * getNode(int j) const{QueryNode *qn = qnList[j]; return qn; }
00046       int size() const{return qnList.size();}
00048       void push_back(QueryNode *qn){qnList.push_back(qn);}
00050       QueryNode *popNode() { 
00051         QueryNode *qn = qnList[i]; 
00052         qnList[i++] = NULL;
00053         return qn;  
00054       }
00055     private:
00057       mutable int i;
00059       vector<QueryNode *> qnList;
00060     };
00061     //------------------------------------------------------------
00062     //      Abstract Interface for Query Nodes 
00063     //------------------------------------------------------------
00064 
00065 
00067 
00072     class QueryNode {
00073     public:
00075       QueryNode(int id, double weight) : 
00076         w(weight),it(id),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00077         dList(NULL), proxList(NULL), dw(0) {}
00079       QueryNode() : w(1),it(0),ch(NULL),entries(0),nextDoc(0),dCnt(0),
00080                     dList(NULL), proxList(NULL), dw(0){}
00082       QueryNode(double weight, double dWeight = 0) : w(weight), it(0), ch(NULL),
00083                                                      entries(0), nextDoc(0),
00084                                                      dCnt(0),proxList(NULL),
00085                                                      dList(NULL), 
00086                                                      dw(dWeight) {}
00088       virtual ~QueryNode() {
00089         delete[](dList);
00090         delete(proxList);
00091         delete(ch);
00092       }
00094       const QnList *children() const { return ch;}
00096       void setChildren(QnList *cl) {ch = cl;}
00098       int id() const { return it;}
00100       double weight() const { return w; }
00101       // update weight
00102       void setWeight(double wt) { w = wt; }
00103       // update entries count
00104       void setEntries(int cnt) { entries = cnt; }
00105 
00107       virtual void copyDocList(int len, int tf, const lemur::api::DocInfoList *dl, int dc) {
00108       }
00110       virtual void updateDocList(int numDocs) = 0;
00112       virtual double eval(const lemur::api::DocumentRep *dRep) const = 0;
00113 
00115       bool isProxNode() const { return proxList != NULL; }
00117       void startProxIteration() const {if (proxList) proxList->startIteration();}
00119       bool hasMoreProx() const {return proxList && proxList->hasMore();}
00121       bool nextProxItem() const {return proxList && proxList->nextDoc();}
00124       bool nextProxItem(lemur::api::DOCID_T did) const{
00125         return proxList && proxList->nextDoc(did);
00126       }
00127 
00129       int dCnt;
00131       bool *dList;
00133       mutable lemur::api::DOCID_T nextDoc;
00135       ProxInfo * proxList;
00136 
00137     protected:  
00140       void transformPassageOps();
00142       QnList *ch;
00144       int entries;
00146       int it;
00148       double w; 
00150       double dw;
00151 
00153       void unionDocList(int numDocs);
00155       void intersectDocList(int numDocs);
00156     };
00157 
00159     class BeliefNode : public QueryNode {
00160     public:
00161       BeliefNode(double wt) : QueryNode(wt) { }
00162       BeliefNode(int id, double weight) : QueryNode(id, weight) { }  
00163       BeliefNode(double wt, double dbelief) : QueryNode(wt, dbelief) { }
00164       virtual ~BeliefNode() { }
00166       virtual void updateDocList(int numDocs) {
00167         unionDocList(numDocs);   
00168       }
00169     };
00170 
00171 
00173     class ProxNode : public QueryNode {
00174     public:
00175       ProxNode(double wt) : QueryNode(wt) {}
00176   
00177       ProxNode(int id, double weight) :  QueryNode(id, weight) { }  
00178       ProxNode(double w, double d) : QueryNode(w, d), winSize(0) {}
00179       ProxNode(int size, double w, double d) : QueryNode(w, d), winSize(size) {}
00180       virtual ~ProxNode() { }
00182       virtual double eval(const lemur::api::DocumentRep *dRep) const {
00183         return proximityScore(dRep, dw);
00184       }
00186       virtual void updateDocList(int numDocs) {
00187         intersectDocList(numDocs);   
00188       }
00189 
00190     protected:
00192       int winSize;
00193     private:
00197       double proximityScore(const lemur::api::DocumentRep *dR, double defaultScore) const {
00198         const StructQryDocRep *dRep = (const StructQryDocRep *)dR;
00199         int tf = 0;
00200         double score;
00201         if(dRep->did < nextDoc) {
00202           return defaultScore;
00203         }
00204         // Skip to next doc id that we can score (> or == to current).
00205         if (proxList->nextDoc(dRep->did)) {
00206           double idf = dRep->computeIdfScore(dCnt);
00207           // compute proximity score for whole doc
00208           tf = proxList->count();
00209           score = dRep->beliefScore(tf, idf);
00210           // advance the prox pointer.
00211           if (proxList->nextDoc()) {
00212             nextDoc = proxList->id();
00213           } else {
00214             // No more prox entries.
00215             nextDoc = INT_MAX;
00216           }
00217         } else {
00218           score = defaultScore;
00219         }
00220         return score;
00221       }
00222     };
00223 
00224 
00227     class SumQnode : public BeliefNode {
00228     public:
00229       SumQnode(double wt) : BeliefNode(wt){}
00230       virtual ~SumQnode() {}
00233       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00234         double sum = 0;
00235         int count = 0;
00236         const QueryNode *qn;
00237         ch->startIteration();
00238         while(ch->hasMore()) {
00239           qn = ch->nextNode();
00240           sum += qn->eval(dRep);
00241           count++;
00242         }
00243         //    return sum/entries;
00244         return sum/count;
00245       }
00246     };
00247 
00250     class WsumQnode : public BeliefNode {
00251     public:
00252       WsumQnode(double w) : BeliefNode(w) {}
00253       virtual ~WsumQnode() {}
00256       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00257         double sum = 0;
00258         double total_wt = 0;
00259         const QueryNode *qn;
00260         double wt;
00261         ch->startIteration();
00262         while(ch->hasMore()) {
00263           qn = ch->nextNode();
00264           wt = qn->weight(); 
00265           total_wt += wt;
00266           sum += wt * qn->eval(dRep);
00267         }
00268         if(total_wt > 0) // normalized by the sum of the weights
00269           sum /= total_wt;
00270         // the belief is scaled by the weight 'w' of itself.
00271         return sum;
00272       }
00273     };
00274 
00275 
00279     class AndQnode : public BeliefNode {
00280     public:
00281       AndQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00282       virtual ~AndQnode() {}
00286       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00287         double prod = 1;
00288         const QueryNode *qn;
00289         double wt;
00290         ch->startIteration();
00291         while(ch->hasMore()) {
00292           qn = ch->nextNode();
00293           wt = qn->eval(dRep);
00294           if(wt > dw)
00295             prod *= wt;
00296           else
00297             prod *= dw;
00298         }
00299         return prod;
00300       }
00301     };
00302 
00307     class OrQnode : public BeliefNode {
00308     public:
00309       OrQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00310       virtual ~OrQnode() {}
00313       virtual double eval(const lemur::api::DocumentRep *dRep) const {
00314         double prod = 1.0;
00315         const QueryNode *qn;
00316         double wt;
00317         ch->startIteration();
00318         while(ch->hasMore()) {
00319           qn = ch->nextNode();
00320           wt = qn->eval(dRep);
00321           if(wt > dw)
00322             prod *= (1.0 - wt);
00323         }
00324         return (1.0 - prod);
00325       }
00326     };
00327 
00331     class NotQnode : public BeliefNode {
00332     public:
00333       NotQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00334       virtual ~NotQnode() {}
00336       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00337         // inverting the belief in the only child node
00338         const QueryNode *qn;
00339         ch->startIteration();
00340         qn = ch->nextNode();
00341         return (1.0 - qn->eval(dRep));
00342       }
00343     };
00344 
00347     class MaxQnode : public BeliefNode {
00348     public:
00349       MaxQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00350       virtual ~MaxQnode() {}
00353       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00354         double mx = dw;
00355         const QueryNode *qn;
00356         double wt;
00357         ch->startIteration();
00358         while(ch->hasMore()) {
00359           qn = ch->nextNode();
00360           wt = qn->eval(dRep);
00361           if(wt > mx) 
00362             mx = wt;
00363         }
00364         return mx;
00365       }
00366     };
00367 
00372     class BandQnode : public BeliefNode {
00373     public:
00374       BandQnode(double dbelief, double w) : BeliefNode(w, dbelief) {}
00375       virtual ~BandQnode() {}
00379       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00380         double prod = 1.0;
00381         const QueryNode *qn;
00382         double wt;
00383         StructQryDocRep * myRep = (StructQryDocRep *)dRep;
00384         lemur::api::DOCID_T did = myRep->did;
00385         ch->startIteration();
00386         while(ch->hasMore()) {
00387           qn = ch->nextNode();
00388           // advance child prox entry to this document
00389           if (qn->hasMoreProx() && qn->nextProxItem(did))
00390             qn->nextDoc = did; // update nextDoc for term node eval
00391           wt = qn->eval(dRep);
00392           if(wt > dw) 
00393             prod *= wt;
00394           else 
00395             return 0;
00396         }
00397         return prod;
00398       }
00399       virtual void updateDocList(int numDocs) {
00400         intersectDocList(numDocs); // different from superclass.
00401       }
00402     };
00403 
00409     class BandNotQnode : public BeliefNode {
00410     public:
00411       BandNotQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00412       virtual ~BandNotQnode() {}
00416       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00417         double child1Wt;
00418         double child2Wt;
00419         // boolean_and_not consider only two children
00420         const QueryNode *qn;
00421         ch->startIteration();
00422         qn = ch->nextNode();
00423         child1Wt = qn->eval(dRep);
00424         qn = ch->nextNode();
00425         child2Wt = qn->eval(dRep);
00426         if(child2Wt > dw)
00427           return 0;
00428         else
00429           return (child1Wt * (1.0 - child2Wt));
00430       }
00431     };
00432 
00433 
00438     class FiltRejQnode : public BeliefNode {
00439     public:
00440       FiltRejQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00441       virtual ~FiltRejQnode() {}
00446       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00447         double child1Wt;
00448         double child2Wt;
00449         // filter_reject consider only two children
00450         const QueryNode *qn;
00451         ch->startIteration();
00452         qn = ch->nextNode();
00453         child1Wt = qn->eval(dRep);
00454         qn = ch->nextNode();
00455         child2Wt = qn->eval(dRep);
00456         if(child1Wt > dw && child2Wt <= dw)
00457           return child1Wt;
00458         else
00459           return dw;
00460       }
00461     };
00462 
00467     class FiltReqQnode : public BeliefNode {
00468     public:
00469       FiltReqQnode(double dbelief, double wt) : BeliefNode(wt, dbelief) {}
00470       virtual ~FiltReqQnode() {}
00474       virtual double eval(const lemur::api::DocumentRep *dRep) const{
00475         double child1Wt;
00476         double child2Wt;
00477         // filter_require consider only two children
00478         const QueryNode *qn;
00479         ch->startIteration();
00480         qn = ch->nextNode();
00481         child1Wt = qn->eval(dRep);
00482         qn = ch->nextNode();
00483         child2Wt = qn->eval(dRep);
00484         if(child1Wt > dw && child2Wt > dw)
00485           return child1Wt;
00486         else
00487           return dw;
00488       }
00489       virtual void updateDocList(int numDocs) {
00490         intersectDocList(numDocs); // different from superclass.
00491       }
00492     };
00493 
00495     class TermQnode : public ProxNode {
00496     public:
00497       TermQnode(int id, double weight) : ProxNode(id, weight) { }
00498       virtual ~TermQnode() {}
00500       virtual double eval(const lemur::api::DocumentRep *dRep) const{ 
00501         StructQryDocRep * myRep = (StructQryDocRep *)dRep;
00502         lemur::api::DOCID_T did = myRep->did;
00503         double freq = 0.0;
00504         if (nextDoc == did) {
00505           freq = (double)proxList->count();
00506           if (hasMoreProx()) {
00507             // advance the prox pointer.
00508             nextProxItem();
00509             nextDoc = proxList->id();
00510           } else {
00511             // No more prox entries.
00512             nextDoc = INT_MAX;
00513           }
00514         }
00515         return(myRep->termWeight(it, freq, dCnt));
00516       }
00518       virtual void copyDocList(int len, int tf, const lemur::api::DocInfoList *dl, int dc);
00519     };
00520 
00523     class OdnQNode : public ProxNode {
00524     public:
00525       OdnQNode(int size, double w, double d) : ProxNode(size, w, d){}
00526       virtual ~OdnQNode() {}
00527       virtual void updateDocList(int numDocs) {
00528         ProxNode::updateDocList(numDocs);   // use superclass method
00529         orderedProxList(numDocs);
00530       }
00531   
00532     private:
00534       void orderedProxList(int numDocs);
00536       bool foundOrderedProx(int bpos, int wsize, const QnList *cl, int ith);
00537     };
00538 
00541     class UwnQNode : public ProxNode {
00542     public:
00543       UwnQNode(int size, double w, double d) : ProxNode(size, w, d) {}
00544       virtual ~UwnQNode() {}
00545       virtual void updateDocList(int numDocs) {
00546         ProxNode::updateDocList(numDocs);   // use superclass method
00547         unorderedProxList(numDocs);
00548       }
00549     private:
00551       void unorderedProxList(int numDocs);
00553       bool findUnorderedWin(const QueryNode *cqn, QnList *cl, int winSize);
00554     };
00555 
00556 
00558     // fix to be WPARSUMN 
00562     class PassageQNode : public ProxNode {
00563     public:
00564       PassageQNode(int size, double w) : ProxNode(w){ winSize = size; }
00565       virtual ~PassageQNode() {}
00566 
00571       virtual double eval(const lemur::api::DocumentRep *dR) const{
00572         StructQryDocRep *dRep = (StructQryDocRep *)dR;
00573         double maxScore = 0;
00574         double score;      
00575         dRep->startPassageIteration(winSize);
00576         while(dRep->hasMorePassage()) {
00577           score = passageScore(dRep);
00578           if(score > maxScore) {
00579             maxScore = score;
00580           }
00581           dRep->nextPassage();
00582         }
00583         return maxScore;
00584       }
00585 
00586       virtual void updateDocList(int numDocs) {
00587         unionDocList(numDocs); // different from superclass.
00588         transformPassageOps();
00589       }
00590     private:
00593       double passageScore(const StructQryDocRep *dRep) const;
00594     };
00595 
00598     class SynQNode : public ProxNode {
00599     public:
00600       SynQNode(double w, double d) : ProxNode(w, d) {}
00601       virtual ~SynQNode() {}
00602       virtual void updateDocList(int numDocs) {
00603         unionDocList(numDocs); // different from superclass.
00604         synonymProxList();
00605       }
00606   
00607     private:
00609       void synonymProxList();
00610     };
00611 
00615     class PropQNode : public OdnQNode {
00616     public:
00617       PropQNode(double w, double d) : OdnQNode(0, w, d){}
00618       virtual ~PropQNode() {}
00619     };
00620   }
00621 }
00622 
00623 #endif

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4