Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

TwoStageTermScoreFunction.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // TwoStageTermScoreFunction.hpp
00015 //
00016 // 16 April 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00020 #define INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00021 namespace indri
00022 {
00023   namespace query
00024   {
00025     
00026     class TwoStageTermScoreFunction : public TermScoreFunction {
00027     private:
00028       double _mu;
00029       double _lambda;
00030       double _collectionFrequency;
00031 
00032     public:
00033       TwoStageTermScoreFunction( double mu, double lambda, double collectionFrequency ) :
00034         _mu(mu),
00035         _lambda(lambda),
00036         _collectionFrequency(collectionFrequency) {
00037       }
00038 
00039       double scoreOccurrence( double occurrences, int contextSize ) {
00040 
00041         //                    [  c(w;d) + \mu * p(w|C)   ]
00042         //    ( 1 - \lambda ) [ ------------------------ ] + \lambda * p(w|C)
00043         //                    [       |d| + \mu          ]
00044 
00045         double dirichlet = ((double(occurrences) + _mu*_collectionFrequency) / (double(contextSize) + _mu));
00046         double p = ( 1-_lambda ) * dirichlet + _lambda * _collectionFrequency;
00047         return log(p);
00048       }
00049 
00050       double scoreOccurrence( double occurrences, int contextSize, double documentOccurrences, int documentLength ) {
00051         double documentFrequency = double(documentOccurrences) / double(documentLength);
00052         double dirichlet = ((double(occurrences) + _mu*documentFrequency) / (double(contextSize) + _mu));
00053         double p = ( 1-_lambda ) * dirichlet + _lambda * _collectionFrequency;
00054         return log(p);
00055       }
00056     };
00057   }
00058 }
00059 
00060 #endif // INDRI_TWOSTAGETERMSCOREFUNCTION_HPP
00061 

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4