Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

SmoothingAnnotatorWalker.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // SmoothingAnnotatorWalker
00015 //
00016 // 27 April 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_SMOOTHINGANNOTATORWALKER_HPP
00020 #define INDRI_SMOOTHINGANNOTATORWALKER_HPP
00021 
00022 #include "indri/Parameters.hpp"
00023 
00024 class SmoothingAnnotatorWalker : public indri::lang::Walker {
00025 private:
00026   struct rule_type {
00027     std::string field;
00028     std::string op;
00029     std::string smoothing;
00030   };
00031 
00032   std::vector<rule_type*> _rules;
00033   std::string _defaultSmoothing;
00034 
00035   void _loadSmoothingRules( Parameters& parameters ) {
00036     if( !parameters.exists("rule") )
00037       return;
00038 
00039     Parameters rules = parameters["rule"];
00040 
00041     for(size_t i=0; i<rules.size(); i++) {
00042       std::string ruleText = rules[i];
00043 
00044       int nextComma = 0;
00045       int nextColon = 0;
00046       int location = 0;
00047 
00048       rule_type* rule = new rule_type;
00049       rule->op = "*";
00050       rule->field = "*";
00051 
00052       for( location = 0; location < ruleText.length(); ) {
00053         nextComma = ruleText.find( ',', location );
00054         nextColon = ruleText.find( ':', location );
00055 
00056         std::string key = ruleText.substr( location, nextColon-location );
00057         std::string value = ruleText.substr( nextColon+1, nextComma-nextColon-1 );
00058 
00059         if( key == "field" ) {
00060           rule->field = value;
00061         } else if( key == "operator" ) {
00062           rule->op = value;
00063         } else {
00064           if( rule->smoothing.size() ) rule->smoothing += ",";
00065           rule->smoothing += key + ":" + value;
00066         }
00067 
00068         if( nextComma > 0 )
00069           location = nextComma+1;
00070         else
00071           location = ruleText.size();
00072       }
00073 
00074       _rules.push_back(rule);
00075     }
00076   }
00077 
00078   const std::string& _matchSmoothingRule( const std::string& field, const std::string& op ) {
00079     for( int i=signed(_rules.size())-1; i >= 0; i-- ) {
00080       const rule_type& rule = *_rules[i];
00081 
00082       if( ( rule.field == field || rule.field == "*" ) &&
00083         ( rule.op == op || rule.op == "*" ) ) {
00084         return rule.smoothing;
00085       }
00086     }
00087 
00088     return _defaultSmoothing;
00089   }
00090 
00091 public:
00092   SmoothingAnnotatorWalker( Parameters& parameters ) {
00093     _loadSmoothingRules( parameters );
00094     _defaultSmoothing = "method:dirichlet,mu:2500";
00095   }
00096 
00097   ~SmoothingAnnotatorWalker( ) {
00098     delete_vector_contents<rule_type*>( _rules );
00099   }
00100 
00101   void after( indri::lang::RawScorerNode* scorer ) {
00102     indri::lang::Node* context = scorer->getContext();
00103     indri::lang::Field* contextField = dynamic_cast<indri::lang::Field*>(context);
00104     indri::lang::ExtentAnd* contextExtAnd = dynamic_cast<indri::lang::ExtentAnd*>(context);
00105     std::string fieldName;
00106 
00107     // there may be an ExtentAnd around the field, so descend into it if necessary
00108     if( contextExtAnd && contextExtAnd->getChildren().size() == 1 ) {
00109       contextField = dynamic_cast<indri::lang::Field*>(contextExtAnd->getChildren()[0]);
00110     }
00111 
00112     // if there's a field here, record its name
00113     if( contextField ) {
00114       fieldName = contextField->getFieldName();
00115     } else {
00116       fieldName = "?";
00117     }
00118     
00119     indri::lang::Node* raw = scorer->getRawExtent();
00120     indri::lang::Node* rawTerm = dynamic_cast<indri::lang::IndexTerm*>(raw);
00121     indri::lang::Node* rawODNode = dynamic_cast<indri::lang::ODNode*>(raw);
00122     indri::lang::Node* rawUWNode = dynamic_cast<indri::lang::UWNode*>(raw);
00123     std::string op;
00124 
00125     if( rawODNode || rawUWNode ) {
00126       op = "window";
00127     } else if( rawTerm ) {
00128       op = "term";
00129     } else {
00130       op = "?";
00131     }
00132 
00133     scorer->setSmoothing( _matchSmoothingRule( fieldName, op ) );
00134   }
00135 };
00136 
00137 #endif // INDRI_SMOOTHINGANNOTATORWALKER_HPP
00138 

Generated on Wed Nov 3 12:59:04 2004 for Lemur Toolkit by doxygen1.2.18