Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

JelinekMercerTermScoreFunction.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // JelinekMercerTermScoreFunction
00015 //
00016 // 26 January 2004 - tds
00017 //
00018 
00019 #ifndef INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00020 #define INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00021 
00022 #include <math.h>
00023 
00024 class JelinekMercerTermScoreFunction : public TermScoreFunction {
00025 private:
00026   double _lambda;
00027   double _backgroundLambda;
00028   double _collectionFrequency;
00029   double _collectionComponent;
00030   double _oneLevelCollectionComponent;
00031   double _contextLambda;
00032   double _collectionLambda;
00033   double _documentLambda;
00034   double _foregroundLambda;
00035 
00036 public:
00037   JelinekMercerTermScoreFunction( double collectionFrequency, double collectionLambda, double documentLambda = 0.0 ) {
00038     _contextLambda = (1 - collectionLambda - documentLambda);
00039     _collectionFrequency = collectionFrequency;
00040     _collectionLambda = collectionLambda;
00041     _documentLambda = documentLambda;
00042     _foregroundLambda = _collectionLambda + _documentLambda;
00043 
00044     assert( _documentLambda >= 0.0 && _documentLambda <= 1.0 );
00045     assert( _collectionLambda >= 0.0 && _collectionLambda <= 1.0 );
00046     assert( _contextLambda >= 0.0 && _contextLambda <= 1.0 );
00047     
00048     _collectionComponent = _collectionLambda * _collectionFrequency;
00049   }
00050 
00051   double scoreOccurrence( int occurrences, int contextSize ) {
00052     //
00053     //             [                      occurrences                                             ]
00054     // score = log [ foregroundLambda * ---------------  + collectionLambda * collectionFrequency ]
00055     //             [                      contextSize                                             ]
00056     //
00057 
00058     double contextFrequency = contextSize ? double(occurrences) / double(contextSize) : 0.0;
00059     return log( _foregroundLambda * contextFrequency + _collectionComponent );
00060   }
00061 
00062   double scoreOccurrence( int occurrences, int contextSize, int documentOccurrences, int documentLength ) {
00063     double contextFrequency = contextSize ? double(occurrences) / double(contextSize) : 0.0;
00064     double documentFrequency = documentLength ? double(documentOccurrences) / double(documentLength) : 0.0;
00065     return log( _contextLambda * contextFrequency + _documentLambda * documentFrequency + _collectionComponent );
00066   }
00067 };
00068 
00069 #endif // INDRI_JELINEKMERCERTERMSCOREFUNCTION_HPP
00070 

Generated on Wed Nov 3 12:58:59 2004 for Lemur Toolkit by doxygen1.2.18