00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_SMOOTHINGANNOTATORWALKER_HPP
00020 #define INDRI_SMOOTHINGANNOTATORWALKER_HPP
00021
00022 #include "indri/Parameters.hpp"
00023
00024 class SmoothingAnnotatorWalker : public indri::lang::Walker {
00025 private:
00026 struct rule_type {
00027 std::string field;
00028 std::string op;
00029 std::string smoothing;
00030 };
00031
00032 std::vector<rule_type*> _rules;
00033 std::string _defaultSmoothing;
00034
00035 void _loadSmoothingRules( Parameters& parameters ) {
00036 if( !parameters.exists("rule") )
00037 return;
00038
00039 Parameters rules = parameters["rule"];
00040
00041 for(size_t i=0; i<rules.size(); i++) {
00042 std::string ruleText = rules[i];
00043
00044 int nextComma = 0;
00045 int nextColon = 0;
00046 int location = 0;
00047
00048 rule_type* rule = new rule_type;
00049 rule->op = "*";
00050 rule->field = "*";
00051
00052 for( location = 0; location < ruleText.length(); ) {
00053 nextComma = ruleText.find( ',', location );
00054 nextColon = ruleText.find( ':', location );
00055
00056 std::string key = ruleText.substr( location, nextColon-location );
00057 std::string value = ruleText.substr( nextColon+1, nextComma-nextColon-1 );
00058
00059 if( key == "field" ) {
00060 rule->field = value;
00061 } else if( key == "operator" ) {
00062 rule->op = value;
00063 } else {
00064 if( rule->smoothing.size() ) rule->smoothing += ",";
00065 rule->smoothing += key + ":" + value;
00066 }
00067
00068 if( nextComma > 0 )
00069 location = nextComma+1;
00070 else
00071 location = ruleText.size();
00072 }
00073
00074 _rules.push_back(rule);
00075 }
00076 }
00077
00078 const std::string& _matchSmoothingRule( const std::string& field, const std::string& op ) {
00079 for( int i=signed(_rules.size())-1; i >= 0; i-- ) {
00080 const rule_type& rule = *_rules[i];
00081
00082 if( ( rule.field == field || rule.field == "*" ) &&
00083 ( rule.op == op || rule.op == "*" ) ) {
00084 return rule.smoothing;
00085 }
00086 }
00087
00088 return _defaultSmoothing;
00089 }
00090
00091 public:
00092 SmoothingAnnotatorWalker( Parameters& parameters ) {
00093 _loadSmoothingRules( parameters );
00094 _defaultSmoothing = "method:dirichlet,mu:2500";
00095 }
00096
00097 ~SmoothingAnnotatorWalker( ) {
00098 delete_vector_contents<rule_type*>( _rules );
00099 }
00100
00101 void after( indri::lang::RawScorerNode* scorer ) {
00102 indri::lang::Node* context = scorer->getContext();
00103 indri::lang::Field* contextField = dynamic_cast<indri::lang::Field*>(context);
00104 indri::lang::ExtentAnd* contextExtAnd = dynamic_cast<indri::lang::ExtentAnd*>(context);
00105 std::string fieldName;
00106
00107
00108 if( contextExtAnd && contextExtAnd->getChildren().size() == 1 ) {
00109 contextField = dynamic_cast<indri::lang::Field*>(contextExtAnd->getChildren()[0]);
00110 }
00111
00112
00113 if( contextField ) {
00114 fieldName = contextField->getFieldName();
00115 } else {
00116 fieldName = "?";
00117 }
00118
00119 indri::lang::Node* raw = scorer->getRawExtent();
00120 indri::lang::Node* rawTerm = dynamic_cast<indri::lang::IndexTerm*>(raw);
00121 indri::lang::Node* rawODNode = dynamic_cast<indri::lang::ODNode*>(raw);
00122 indri::lang::Node* rawUWNode = dynamic_cast<indri::lang::UWNode*>(raw);
00123 std::string op;
00124
00125 if( rawODNode || rawUWNode ) {
00126 op = "window";
00127 } else if( rawTerm ) {
00128 op = "term";
00129 } else {
00130 op = "?";
00131 }
00132
00133 scorer->setSmoothing( _matchSmoothingRule( fieldName, op ) );
00134 }
00135 };
00136
00137 #endif // INDRI_SMOOTHINGANNOTATORWALKER_HPP
00138