00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_SMOOTHINGANNOTATORWALKER_HPP
00020 #define INDRI_SMOOTHINGANNOTATORWALKER_HPP
00021
00022 #include "indri/Parameters.hpp"
00023 namespace indri
00024 {
00025 namespace lang
00026 {
00027
00028 class SmoothingAnnotatorWalker : public indri::lang::Walker {
00029 private:
00030 struct rule_type {
00031 std::string node;
00032 std::string field;
00033 std::string op;
00034 std::string smoothing;
00035 };
00036
00037 std::vector<rule_type*> _rules;
00038 std::string _defaultSmoothing;
00039
00040 void _loadSmoothingRules( indri::api::Parameters& parameters ) {
00041 if( !parameters.exists("rule") )
00042 return;
00043
00044 indri::api::Parameters rules = parameters["rule"];
00045
00046 for(size_t i=0; i<rules.size(); i++) {
00047 std::string ruleText = rules[i];
00048
00049 int nextComma = 0;
00050 int nextColon = 0;
00051 int location = 0;
00052
00053 rule_type* rule = new rule_type;
00054 rule->node = "RawScorerNode";
00055 rule->op = "*";
00056 rule->field = "*";
00057
00058 for( location = 0; location < ruleText.length(); ) {
00059 nextComma = ruleText.find( ',', location );
00060 nextColon = ruleText.find( ':', location );
00061
00062 std::string key = ruleText.substr( location, nextColon-location );
00063 std::string value = ruleText.substr( nextColon+1, nextComma-nextColon-1 );
00064
00065 if( key == "node" ) {
00066 rule->node = value;
00067 } else if( key == "field" ) {
00068 rule->field = value;
00069 } else if( key == "operator" ) {
00070 rule->op = value;
00071 } else {
00072 if( rule->smoothing.size() ) rule->smoothing += ",";
00073 rule->smoothing += key + ":" + value;
00074 }
00075
00076 if( nextComma > 0 )
00077 location = nextComma+1;
00078 else
00079 location = ruleText.size();
00080 }
00081
00082 _rules.push_back(rule);
00083 }
00084 }
00085
00086 const std::string& _matchSmoothingRule( const std::string& node, const std::string& field, const std::string& op ) {
00087 for( int i=signed(_rules.size())-1; i >= 0; i-- ) {
00088 const rule_type& rule = *_rules[i];
00089
00090 if( ( rule.node == node ) &&
00091 ( rule.field == field || rule.field == "*" ) &&
00092 ( rule.op == op || rule.op == "*" ) ) {
00093 return rule.smoothing;
00094 }
00095 }
00096
00097 return _defaultSmoothing;
00098 }
00099
00100 public:
00101 SmoothingAnnotatorWalker( indri::api::Parameters& parameters ) {
00102 _loadSmoothingRules( parameters );
00103 _defaultSmoothing = "method:dirichlet,mu:2500";
00104 }
00105
00106 ~SmoothingAnnotatorWalker( ) {
00107 indri::utility::delete_vector_contents<rule_type*>( _rules );
00108 }
00109
00110 void after( indri::lang::RawScorerNode* scorer ) {
00111 indri::lang::Node* context = scorer->getContext();
00112 indri::lang::Field* contextField = dynamic_cast<indri::lang::Field*>(context);
00113 indri::lang::ExtentOr* contextExtOr = dynamic_cast<indri::lang::ExtentOr*>(context);
00114 std::string fieldName;
00115
00116
00117 if( contextExtOr && contextExtOr->getChildren().size() == 1 ) {
00118 contextField = dynamic_cast<indri::lang::Field*>(contextExtOr->getChildren()[0]);
00119 }
00120
00121
00122 if( contextField ) {
00123 fieldName = contextField->getFieldName();
00124 } else {
00125 fieldName = "?";
00126 }
00127
00128 indri::lang::Node* raw = scorer->getRawExtent();
00129 indri::lang::Node* rawTerm = dynamic_cast<indri::lang::IndexTerm*>(raw);
00130 indri::lang::Node* rawODNode = dynamic_cast<indri::lang::ODNode*>(raw);
00131 indri::lang::Node* rawUWNode = dynamic_cast<indri::lang::UWNode*>(raw);
00132 indri::lang::Node* rawWeightedExtentOr = dynamic_cast<indri::lang::WeightedExtentOr*>(raw);
00133
00134 std::string op;
00135
00136 if( rawODNode || rawUWNode ) {
00137 op = "window";
00138 } else if( rawTerm || rawWeightedExtentOr ) {
00139 op = "term";
00140 } else {
00141 op = "?";
00142 }
00143
00144 scorer->setSmoothing( _matchSmoothingRule( "RawScorerNode", fieldName, op ) );
00145 }
00146
00147 void after( indri::lang::NestedRawScorerNode* scorer ) {
00148 after( (indri::lang::RawScorerNode *) scorer );
00149 }
00150
00151 void after( indri::lang::ShrinkageScorerNode* scorer ) {
00152 after( (indri::lang::RawScorerNode *) scorer );
00153
00154 for( int i=signed(_rules.size())-1; i >= 0; i-- ) {
00155 const rule_type& rule = *_rules[i];
00156
00157 if( rule.node == "ShrinkageBelief" &&
00158 rule.op == "*" ) {
00159 if ( rule.field == "*" ) {
00160 scorer->addShrinkageRule( rule.smoothing );
00161 } else {
00162 std::string ruleString = "field:" + rule.field + "," + rule.smoothing;
00163 scorer->addShrinkageRule( ruleString );
00164 }
00165 }
00166 }
00167 }
00168
00169 void after( indri::lang::LengthPrior* prior ) {
00170 std::string ruleText = _matchSmoothingRule( "LengthPrior", "*", "*" );
00171 double exponent = 0;
00172
00173 int nextComma = 0;
00174 int nextColon = 0;
00175 int location = 0;
00176
00177 for( location = 0; location < ruleText.length(); ) {
00178 nextComma = ruleText.find( ',', location );
00179 nextColon = ruleText.find( ':', location );
00180
00181 std::string key = ruleText.substr( location, nextColon-location );
00182 std::string value = ruleText.substr( nextColon+1, nextComma-nextColon-1 );
00183
00184 if( key == "exponent" ) {
00185 exponent = atof( value.c_str() );
00186 }
00187 if( nextComma > 0 )
00188 location = nextComma+1;
00189 else
00190 location = ruleText.size();
00191 }
00192
00193 prior->setExponent( exponent );
00194 }
00195 };
00196 }
00197 }
00198
00199 #endif // INDRI_SMOOTHINGANNOTATORWALKER_HPP
00200