00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // JelinekMercerTermScoreFunction 00015 // 00016 // 26 January 2004 - tds 00017 // 00018 00019 #ifndef INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP 00020 #define INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP 00021 00022 #include <math.h> 00023 namespace indri 00024 { 00026 namespace query 00027 { 00028 00029 class JelinekMercerTermScoreFunction : public TermScoreFunction { 00030 private: 00031 double _lambda; 00032 double _backgroundLambda; 00033 double _collectionFrequency; 00034 double _collectionComponent; 00035 double _oneLevelCollectionComponent; 00036 double _contextLambda; 00037 double _collectionLambda; 00038 double _documentLambda; 00039 double _foregroundLambda; 00040 00041 public: 00042 JelinekMercerTermScoreFunction( double collectionFrequency, double collectionLambda, double documentLambda = 0.0 ) { 00043 _contextLambda = (1 - collectionLambda - documentLambda); 00044 _collectionFrequency = collectionFrequency; 00045 _collectionLambda = collectionLambda; 00046 _documentLambda = documentLambda; 00047 _foregroundLambda = (1 - _collectionLambda); 00048 00049 assert( _documentLambda >= 0.0 && _documentLambda <= 1.0 ); 00050 assert( _collectionLambda >= 0.0 && _collectionLambda <= 1.0 ); 00051 assert( _contextLambda >= 0.0 && _contextLambda <= 1.0 ); 00052 00053 _collectionComponent = _collectionLambda * _collectionFrequency; 00054 } 00055 00056 double scoreOccurrence( double occurrences, int contextSize ) { 00057 // 00058 // [ occurrences ] 00059 // score = log [ foregroundLambda * --------------- + collectionLambda * collectionFrequency ] 00060 // [ contextSize ] 00061 // 00062 00063 double contextFrequency = contextSize ? occurrences / double(contextSize) : 0.0; 00064 return log( _foregroundLambda * contextFrequency + _collectionComponent ); 00065 } 00066 00067 double scoreOccurrence( double occurrences, int contextSize, double documentOccurrences, int documentLength ) { 00068 double contextFrequency = contextSize ? occurrences / double(contextSize) : 0.0; 00069 double documentFrequency = documentLength ? documentOccurrences / double(documentLength) : 0.0; 00070 return log( _contextLambda * contextFrequency + _documentLambda * documentFrequency + _collectionComponent ); 00071 } 00072 }; 00073 } 00074 } 00075 00076 #endif // INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP 00077