00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00020 #define INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00021
00022 #include <math.h>
00023
00024 class JelinekMercerTermScoreFunction : public TermScoreFunction {
00025 private:
00026 double _lambda;
00027 double _backgroundLambda;
00028 double _collectionFrequency;
00029 double _collectionComponent;
00030 double _oneLevelCollectionComponent;
00031 double _contextLambda;
00032 double _collectionLambda;
00033 double _documentLambda;
00034 double _foregroundLambda;
00035
00036 public:
00037 JelinekMercerTermScoreFunction( double collectionFrequency, double collectionLambda, double documentLambda = 0.0 ) {
00038 _contextLambda = (1 - collectionLambda - documentLambda);
00039 _collectionFrequency = collectionFrequency;
00040 _collectionLambda = collectionLambda;
00041 _documentLambda = documentLambda;
00042 _foregroundLambda = _collectionLambda + _documentLambda;
00043
00044 assert( _documentLambda >= 0.0 && _documentLambda <= 1.0 );
00045 assert( _collectionLambda >= 0.0 && _collectionLambda <= 1.0 );
00046 assert( _contextLambda >= 0.0 && _contextLambda <= 1.0 );
00047
00048 _collectionComponent = _collectionLambda * _collectionFrequency;
00049 }
00050
00051 double scoreOccurrence( int occurrences, int contextSize ) {
00052
00053
00054
00055
00056
00057
00058 double contextFrequency = contextSize ? double(occurrences) / double(contextSize) : 0.0;
00059 return log( _foregroundLambda * contextFrequency + _collectionComponent );
00060 }
00061
00062 double scoreOccurrence( int occurrences, int contextSize, int documentOccurrences, int documentLength ) {
00063 double contextFrequency = contextSize ? double(occurrences) / double(contextSize) : 0.0;
00064 double documentFrequency = documentLength ? double(documentOccurrences) / double(documentLength) : 0.0;
00065 return log( _contextLambda * contextFrequency + _documentLambda * documentFrequency + _collectionComponent );
00066 }
00067 };
00068
00069 #endif // INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00070