Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

JelinekMercerTermScoreFunction.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // JelinekMercerTermScoreFunction
00015 //
00016 // 26 January 2004 - tds
00017 //
00018 
00019 #ifndef INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00020 #define INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00021 
00022 #include <math.h>
00023 namespace indri
00024 {
00026   namespace query
00027   {
00028     
00029     class JelinekMercerTermScoreFunction : public TermScoreFunction {
00030     private:
00031       double _lambda;
00032       double _backgroundLambda;
00033       double _collectionFrequency;
00034       double _collectionComponent;
00035       double _oneLevelCollectionComponent;
00036       double _contextLambda;
00037       double _collectionLambda;
00038       double _documentLambda;
00039       double _foregroundLambda;
00040 
00041     public:
00042       JelinekMercerTermScoreFunction( double collectionFrequency, double collectionLambda, double documentLambda = 0.0 ) {
00043         _contextLambda = (1 - collectionLambda - documentLambda);
00044         _collectionFrequency = collectionFrequency;
00045         _collectionLambda = collectionLambda;
00046         _documentLambda = documentLambda;
00047         _foregroundLambda = (1 - _collectionLambda);
00048 
00049         assert( _documentLambda >= 0.0 && _documentLambda <= 1.0 );
00050         assert( _collectionLambda >= 0.0 && _collectionLambda <= 1.0 );
00051         assert( _contextLambda >= 0.0 && _contextLambda <= 1.0 );
00052     
00053         _collectionComponent = _collectionLambda * _collectionFrequency;
00054       }
00055 
00056       double scoreOccurrence( double occurrences, int contextSize ) {
00057         //
00058         //             [                      occurrences                                             ]
00059         // score = log [ foregroundLambda * ---------------  + collectionLambda * collectionFrequency ]
00060         //             [                      contextSize                                             ]
00061         //
00062 
00063         double contextFrequency = contextSize ? occurrences / double(contextSize) : 0.0;
00064         return log( _foregroundLambda * contextFrequency + _collectionComponent );
00065       }
00066 
00067       double scoreOccurrence( double occurrences, int contextSize, double documentOccurrences, int documentLength ) {
00068         double contextFrequency = contextSize ? occurrences / double(contextSize) : 0.0;
00069         double documentFrequency = documentLength ? documentOccurrences / double(documentLength) : 0.0;
00070         return log( _contextLambda * contextFrequency + _documentLambda * documentFrequency + _collectionComponent );
00071       }
00072     };
00073   }
00074 }
00075 
00076 #endif // INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00077 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4