00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _RETRIEVALPARAMETER_HPP
00013 #define _RETRIEVALPARAMETER_HPP
00014
00016 #include "Param.hpp"
00017
00018 #include "TFIDFRetMethod.hpp"
00019 #include "OkapiRetMethod.hpp"
00020 #include "SimpleKLRetMethod.hpp"
00021 #include "CORIRetMethod.hpp"
00022 #include "CosSimRetMethod.hpp"
00023 #include "InQueryRetMethod.hpp"
00024 #include "IndriRetMethod.hpp"
00025
00027 namespace RetrievalParameter {
00028
00030
00031
00032 static lemur::utility::String databaseIndex;
00034 static lemur::utility::String retModel;
00036 static lemur::utility::String textQuerySet;
00038 static lemur::utility::String resultFile;
00040 static bool TRECresultFileFormat;
00042 static int fbDocCount;
00044 static int resultCount;
00046 static bool cacheDocReps;
00048 static bool useWorkingSet;
00050 static lemur::utility::String workSetFile;
00051
00053
00054 static string getLower(const char *parm, const char *def) {
00055 std::string tmpString = lemur::api::ParamGetString(parm, def);
00056
00057 for (int i = 0; i < tmpString.length(); i++)
00058 tmpString[i] = tolower(tmpString[i]);
00059 return tmpString;
00060 }
00061
00062 static void get() {
00063 databaseIndex = lemur::api::ParamGetString("index","");
00064 retModel = getLower("retModel","kl");
00065
00066 if (retModel == "0") retModel = "tfidf";
00067 if (retModel == "1") retModel = "okapi";
00068 if (retModel == "2") retModel = "kl";
00069 if (retModel == "3") retModel = "inquery";
00070 if (retModel == "4") retModel = "cori_cs";
00071 if (retModel == "5") retModel = "cos";
00072 if (retModel == "6") retModel = "inq_struct";
00073 if (retModel == "7") retModel = "indri";
00074
00075 string tmp = getLower("cacheDocReps", "true");
00076 cacheDocReps = (tmp == "true" || tmp == "1");
00077
00078 tmp = getLower("useWorkingSet", "false");
00079 useWorkingSet = (tmp == "true" || tmp == "1");
00080
00081 workSetFile = lemur::api::ParamGetString("workingSetFile","");
00082
00083 textQuerySet = lemur::api::ParamGetString("textQuery","");
00084 resultFile = lemur::api::ParamGetString("resultFile","");
00085
00086 tmp = getLower("resultFormat","trec");
00087 TRECresultFileFormat = (tmp == "trec" || tmp == "1");
00088
00089
00090 fbDocCount = lemur::api::ParamGetInt("feedbackDocCount",0);
00091 resultCount = lemur::api::ParamGetInt("resultCount", 1000);
00092
00093 }
00094 }
00095
00097 namespace TFIDFParameter {
00098
00100
00101 static WeightParam docTFPrm;
00102 static WeightParam qryTFPrm;
00103 static FeedbackParam fbPrm;
00105
00106 static void get()
00107 {
00108 string tfmethod = RetrievalParameter::getLower("doc.tfMethod", "bm25");
00109 if ((tfmethod == "rawtf") || (tfmethod == "0")) docTFPrm.tf = RAWTF;
00110 else if ((tfmethod == "logf") || (tfmethod == "1")) docTFPrm.tf = LOGTF;
00111 else if ((tfmethod == "bm25") || (tfmethod == "2")) docTFPrm.tf = BM25;
00112
00113 docTFPrm.bm25K1 = lemur::api::ParamGetDouble("doc.bm25K1",defaultDocK1);
00114 docTFPrm.bm25B = lemur::api::ParamGetDouble("doc.bm25B",defaultDocB);
00115
00116 tfmethod = RetrievalParameter::getLower("query.tfMethod", "bm25");
00117 if ((tfmethod == "rawtf") || (tfmethod == "0")) qryTFPrm.tf = RAWTF;
00118 else if ((tfmethod == "logf") || (tfmethod == "1")) qryTFPrm.tf = LOGTF;
00119 else if ((tfmethod == "bm25") || (tfmethod == "2")) qryTFPrm.tf = BM25;
00120
00121 qryTFPrm.bm25K1 = lemur::api::ParamGetDouble("query.bm25K1",defaultQryK1);
00122 qryTFPrm.bm25B = defaultQryB;
00123
00124 fbPrm.howManyTerms = lemur::api::ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00125 fbPrm.posCoeff = lemur::api::ParamGetDouble("feedbackPosCoeff", defaultPosCoeff);
00126 }
00127 }
00128
00130 namespace OkapiParameter {
00131
00133
00134 static TFParam tfPrm;
00135 static FeedbackParam fbPrm;
00137
00138
00139 static void get()
00140 {
00141 tfPrm.k1 = lemur::api::ParamGetDouble("BM25K1",defaultK1);
00142 tfPrm.b = lemur::api::ParamGetDouble("BM25B",defaultB);
00143 tfPrm.k3 = lemur::api::ParamGetDouble("BM25K3", defaultK3);
00144 fbPrm.expQTF = lemur::api::ParamGetDouble("BM25QTF", defaultExpQTF);
00145 fbPrm.howManyTerms = lemur::api::ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00146
00147 }
00148 }
00150 namespace SimpleKLParameter {
00152
00153 static SimpleKLParameter::DocSmoothParam docPrm;
00154 static SimpleKLParameter::QueryModelParam qryPrm;
00155 static lemur::utility::String smoothSupportFile;
00157
00158 static void get()
00159 {
00160 smoothSupportFile = lemur::api::ParamGetString("smoothSupportFile", "");
00161
00162 string tmpString = RetrievalParameter::getLower("adjustedScoreMethod",
00163 "negativekld");
00164 if (tmpString == "querylikelihood" || tmpString == "ql") {
00165 qryPrm.adjScoreMethod = SimpleKLParameter::QUERYLIKELIHOOD;
00166 } else if (tmpString == "crossentropy" ||tmpString == "ce") {
00167 qryPrm.adjScoreMethod = SimpleKLParameter::CROSSENTROPY;
00168 } else if (tmpString == "negativekld" || tmpString == "-d") {
00169 qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00170 } else {
00171 cerr << "Unknown scoreMethod " << tmpString << ". Using NEGATIVEKLD"
00172 << endl;
00173 qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00174 }
00175
00176 tmpString = RetrievalParameter::getLower("smoothMethod",
00177 "dirichletprior");
00178 if (tmpString == "jelinikmercer" || tmpString == "jm" || tmpString == "0")
00179 docPrm.smthMethod = SimpleKLParameter::JELINEKMERCER;
00180 else if (tmpString == "dirichletprior" || tmpString == "dir" ||
00181 tmpString == "1")
00182 docPrm.smthMethod = SimpleKLParameter::DIRICHLETPRIOR;
00183 else if (tmpString == "absolutediscount" || tmpString == "ad" ||
00184 tmpString == "2")
00185 docPrm.smthMethod = SimpleKLParameter::ABSOLUTEDISCOUNT;
00186 else if (tmpString == "twostage" || tmpString == "2s" || tmpString == "3")
00187 docPrm.smthMethod = SimpleKLParameter::TWOSTAGE;
00188 else {
00189 cerr << "Unknown smoothMethod " << tmpString << ". Using DIRICHLET"
00190 << endl;
00191 docPrm.smthMethod = SimpleKLParameter::defaultSmoothMethod;
00192 }
00193
00194
00195 tmpString = RetrievalParameter::getLower("smoothStrategy", "interpolate");
00196 if (tmpString == "interpolate" || tmpString == "int" || tmpString == "0")
00197 docPrm.smthStrategy= SimpleKLParameter::INTERPOLATE;
00198 else if (tmpString == "backoff" || tmpString == "bo" || tmpString == "1")
00199 docPrm.smthStrategy= SimpleKLParameter::BACKOFF;
00200 else {
00201 cerr << "Unknown smoothStrategy " << tmpString << ". Using INTERPOLATE"
00202 << endl;
00203 docPrm.smthStrategy= SimpleKLParameter::defaultSmoothStrategy;
00204 }
00205
00206
00207 docPrm.ADDelta = lemur::api::ParamGetDouble("discountDelta",defaultADDelta);
00208 docPrm.JMLambda = lemur::api::ParamGetDouble("JelinekMercerLambda",defaultJMLambda);
00209 docPrm.DirPrior = lemur::api::ParamGetDouble("DirichletPrior",defaultDirPrior);
00210
00211 tmpString = RetrievalParameter::getLower("queryUpdateMethod", "mixture");
00212
00213 if (tmpString == "mixture" || tmpString == "mix" || tmpString == "0")
00214 qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00215 else if (tmpString == "divmin" || tmpString == "div" || tmpString == "1")
00216 qryPrm.fbMethod = SimpleKLParameter::DIVMIN;
00217 else if (tmpString == "markovchain" || tmpString == "mc" ||
00218 tmpString == "2")
00219 qryPrm.fbMethod = SimpleKLParameter::MARKOVCHAIN;
00220 else if (tmpString == "relevancemodel1" || tmpString == "rm1" ||
00221 tmpString == "3")
00222 qryPrm.fbMethod = SimpleKLParameter::RM1;
00223 else if (tmpString == "relevancemodel2" || tmpString == "rm2" ||
00224 tmpString == "4")
00225 qryPrm.fbMethod = SimpleKLParameter::RM2;
00226 else {
00227 cerr << "Unknown queryUpdateMethod " << tmpString
00228 << ". Using MIXTURE"
00229 << endl;
00230 qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00231 }
00232
00233
00234 qryPrm.fbCoeff = lemur::api::ParamGetDouble("feedbackCoefficient", defaultFBCoeff);
00235 qryPrm.fbPrTh = lemur::api::ParamGetDouble("feedbackProbThresh", defaultFBPrTh);
00236 qryPrm.fbPrSumTh = lemur::api::ParamGetDouble("feedbackProbSumThresh",
00237 defaultFBPrSumTh);
00238 qryPrm.fbTermCount = lemur::api::ParamGetInt("feedbackTermCount", defaultFBTermCount);
00239 qryPrm.fbMixtureNoise = lemur::api::ParamGetDouble("feedbackMixtureNoise",
00240 defaultFBMixNoise);
00241 qryPrm.emIterations = lemur::api::ParamGetInt("emIterations", defaultEMIterations);
00242
00243 }
00244 }
00246 namespace CORIParameter {
00247 static lemur::utility::String collectionCounts;
00248 static double cstffactor;
00249 static double cstfbaseline;
00250 static double doctffactor;
00251 static double doctfbaseline;
00252 static void get() {
00253 collectionCounts = lemur::api::ParamGetString("collCounts", "USE_INDEX_COUNTS");
00254 cstffactor = lemur::api::ParamGetDouble("CSCTF_factor", 150);
00255 cstfbaseline = lemur::api::ParamGetDouble("CSCTF_baseline", 50);
00256 doctffactor = lemur::api::ParamGetDouble("DOCCTF_factor", 1.5);
00257 doctfbaseline = lemur::api::ParamGetDouble("DOCCTF_baseline", 0.5);
00258 }
00259 }
00261 namespace CosSimParameter {
00262
00264
00265 static FeedbackParam fbPrm;
00266 static lemur::utility::String L2NormFile;
00268
00269 static void get()
00270 {
00271 fbPrm.howManyTerms = lemur::api::ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00272 fbPrm.posCoeff = lemur::api::ParamGetDouble("feedbackPosCoeff", defaultPosCoeff);
00273 L2NormFile = lemur::api::ParamGetString("L2File", defaultL2File);
00274 }
00275 }
00277 namespace InQueryParameter {
00280 static double fbCoeff = 0.5;
00282 static int fbTermCount = 50;
00284 static double defaultBelief = 0.4;
00286 static bool cacheIDF = false;
00287 static void get()
00288 {
00289 defaultBelief = lemur::api::ParamGetDouble("defaultBelief", defaultBelief);
00290 fbCoeff = lemur::api::ParamGetDouble("feedbackPosCoeff", fbCoeff);
00291 fbTermCount = lemur::api::ParamGetInt("feedbackTermCount", fbTermCount);
00292 string tmpString = RetrievalParameter::getLower("cacheIDF", "true");
00293 cacheIDF = (tmpString == "true" || tmpString == "1");
00294 }
00295 }
00296
00298 namespace IndriParameter {
00301 static indri::api::Parameters params;
00303 static string stopwords;
00305 static void get()
00306 {
00307 RetrievalParameter::get();
00308
00309 stopwords = lemur::api::ParamGetString("stopwords", "");
00310
00311
00312 int fbTerms = lemur::api::ParamGetInt("feedbackTermCount", 10);
00313 double fbOrigWt = lemur::api::ParamGetDouble("fbOrigWt", 0.5);
00314 double fbMu = lemur::api::ParamGetDouble("fbMu", 0);
00315 params.set( "fbDocs" , RetrievalParameter::fbDocCount);
00316 params.set( "fbTerms" , fbTerms );
00317 params.set( "fbOrigWt", fbOrigWt);
00318 params.set( "fbMu", fbMu );
00319
00320 string rule = lemur::api::ParamGetString("rule", "method:dirichlet,mu:2500");
00321 params.set("rule", rule);
00322
00323 params.set( "count", RetrievalParameter::resultCount );
00324 }
00325 }
00326
00327 #endif