Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

RetParamManager.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #ifndef _RETRIEVALPARAMETER_HPP
00013 #define _RETRIEVALPARAMETER_HPP
00014 
00016 #include "Param.hpp"
00017 
00018 #include "TFIDFRetMethod.hpp"
00019 #include "OkapiRetMethod.hpp"
00020 #include "SimpleKLRetMethod.hpp"
00021 #include "CORIRetMethod.hpp"
00022 #include "CosSimRetMethod.hpp"
00023 #include "InQueryRetMethod.hpp"
00024 #include "IndriRetMethod.hpp"
00025 
00027 namespace RetrievalParameter {
00028 
00030 
00031 
00032   static lemur::utility::String databaseIndex;
00034   static lemur::utility::String retModel;
00036   static lemur::utility::String textQuerySet;
00038   static lemur::utility::String resultFile;
00040   static bool TRECresultFileFormat;
00042   static int fbDocCount;
00044   static int resultCount;
00046   static bool cacheDocReps;
00048   static bool useWorkingSet;
00050   static lemur::utility::String workSetFile;
00051 
00053 
00054   static string getLower(const char *parm, const char *def) {
00055     std::string tmpString = lemur::api::ParamGetString(parm, def);
00056     // make it all lowercase
00057     for (int i = 0; i < tmpString.length(); i++)
00058       tmpString[i] = tolower(tmpString[i]);
00059     return tmpString;
00060   }
00061 
00062   static void get() {
00063     databaseIndex = lemur::api::ParamGetString("index","");
00064     retModel = getLower("retModel","kl");
00065     // backwards compatibility.
00066     if (retModel == "0") retModel = "tfidf";
00067     if (retModel == "1") retModel = "okapi";
00068     if (retModel == "2") retModel = "kl";
00069     if (retModel == "3") retModel = "inquery";
00070     if (retModel == "4") retModel = "cori_cs";
00071     if (retModel == "5") retModel = "cos";
00072     if (retModel == "6") retModel = "inq_struct";
00073     if (retModel == "7") retModel = "indri";
00074 
00075     string tmp = getLower("cacheDocReps", "true");
00076     cacheDocReps = (tmp == "true" || tmp == "1");
00077 
00078     tmp = getLower("useWorkingSet", "false"); 
00079     useWorkingSet = (tmp == "true" || tmp == "1");
00080     // working set file name
00081     workSetFile = lemur::api::ParamGetString("workingSetFile",""); 
00082     
00083     textQuerySet = lemur::api::ParamGetString("textQuery","");
00084     resultFile = lemur::api::ParamGetString("resultFile","");
00085 
00086     tmp = getLower("resultFormat","trec");
00087     TRECresultFileFormat = (tmp == "trec" || tmp == "1");
00088 
00089     // default being no feedback
00090     fbDocCount = lemur::api::ParamGetInt("feedbackDocCount",0); 
00091     resultCount = lemur::api::ParamGetInt("resultCount", 1000); 
00092     
00093   }
00094 }
00095 
00097 namespace TFIDFParameter {
00098 
00100 
00101   static WeightParam docTFPrm;
00102   static WeightParam qryTFPrm;
00103   static FeedbackParam fbPrm;
00105   
00106   static void get()
00107   {
00108     string tfmethod = RetrievalParameter::getLower("doc.tfMethod", "bm25");
00109     if ((tfmethod == "rawtf") || (tfmethod == "0")) docTFPrm.tf = RAWTF;
00110     else if ((tfmethod == "logf") || (tfmethod == "1")) docTFPrm.tf = LOGTF;    
00111     else if ((tfmethod == "bm25") || (tfmethod == "2")) docTFPrm.tf = BM25;
00112 
00113     docTFPrm.bm25K1 = lemur::api::ParamGetDouble("doc.bm25K1",defaultDocK1);
00114     docTFPrm.bm25B = lemur::api::ParamGetDouble("doc.bm25B",defaultDocB);
00115 
00116     tfmethod = RetrievalParameter::getLower("query.tfMethod", "bm25");
00117     if ((tfmethod == "rawtf") || (tfmethod == "0")) qryTFPrm.tf = RAWTF;
00118     else if ((tfmethod == "logf") || (tfmethod == "1")) qryTFPrm.tf = LOGTF;    
00119     else if ((tfmethod == "bm25") || (tfmethod == "2")) qryTFPrm.tf = BM25;
00120 
00121     qryTFPrm.bm25K1 = lemur::api::ParamGetDouble("query.bm25K1",defaultQryK1);
00122     qryTFPrm.bm25B = defaultQryB;
00123     
00124     fbPrm.howManyTerms = lemur::api::ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00125     fbPrm.posCoeff = lemur::api::ParamGetDouble("feedbackPosCoeff", defaultPosCoeff); 
00126   }
00127 }
00128 
00130 namespace OkapiParameter {
00131 
00133 
00134   static TFParam tfPrm;
00135   static FeedbackParam fbPrm;
00137 
00138 
00139   static void get()
00140   {
00141     tfPrm.k1 = lemur::api::ParamGetDouble("BM25K1",defaultK1);
00142     tfPrm.b =  lemur::api::ParamGetDouble("BM25B",defaultB);
00143     tfPrm.k3 = lemur::api::ParamGetDouble("BM25K3", defaultK3);
00144     fbPrm.expQTF = lemur::api::ParamGetDouble("BM25QTF", defaultExpQTF);
00145     fbPrm.howManyTerms = lemur::api::ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00146     
00147   }
00148 }
00150 namespace SimpleKLParameter {
00152 
00153   static SimpleKLParameter::DocSmoothParam docPrm;
00154   static SimpleKLParameter::QueryModelParam qryPrm;
00155   static lemur::utility::String smoothSupportFile;
00157     
00158   static void get()
00159   {
00160     smoothSupportFile = lemur::api::ParamGetString("smoothSupportFile", "");
00161 
00162     string tmpString = RetrievalParameter::getLower("adjustedScoreMethod", 
00163                                                     "negativekld");
00164     if (tmpString == "querylikelihood" || tmpString == "ql") {
00165       qryPrm.adjScoreMethod = SimpleKLParameter::QUERYLIKELIHOOD;
00166     } else if (tmpString == "crossentropy" ||tmpString == "ce") {
00167       qryPrm.adjScoreMethod = SimpleKLParameter::CROSSENTROPY;
00168     } else if (tmpString == "negativekld" || tmpString == "-d") {
00169       qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00170     } else {
00171       cerr << "Unknown scoreMethod " << tmpString << ". Using NEGATIVEKLD" 
00172            << endl;
00173       qryPrm.adjScoreMethod = SimpleKLParameter::NEGATIVEKLD;
00174     }
00175 
00176     tmpString = RetrievalParameter::getLower("smoothMethod", 
00177                                              "dirichletprior");
00178     if (tmpString == "jelinikmercer" || tmpString == "jm" || tmpString == "0")
00179       docPrm.smthMethod = SimpleKLParameter::JELINEKMERCER;
00180     else if (tmpString == "dirichletprior" || tmpString == "dir" || 
00181              tmpString == "1")
00182       docPrm.smthMethod = SimpleKLParameter::DIRICHLETPRIOR;
00183     else if (tmpString == "absolutediscount" || tmpString == "ad" || 
00184              tmpString == "2")
00185       docPrm.smthMethod = SimpleKLParameter::ABSOLUTEDISCOUNT;
00186     else if (tmpString == "twostage" || tmpString == "2s" || tmpString == "3")
00187       docPrm.smthMethod = SimpleKLParameter::TWOSTAGE;
00188     else {
00189       cerr << "Unknown smoothMethod " << tmpString << ". Using DIRICHLET" 
00190            << endl;
00191       docPrm.smthMethod = SimpleKLParameter::defaultSmoothMethod;
00192     }
00193     
00194 
00195     tmpString = RetrievalParameter::getLower("smoothStrategy", "interpolate");
00196     if (tmpString == "interpolate" || tmpString == "int" || tmpString == "0")
00197       docPrm.smthStrategy= SimpleKLParameter::INTERPOLATE;
00198     else if (tmpString == "backoff" || tmpString == "bo" || tmpString == "1")
00199       docPrm.smthStrategy= SimpleKLParameter::BACKOFF;
00200     else {
00201       cerr << "Unknown smoothStrategy " << tmpString << ". Using INTERPOLATE" 
00202            << endl;
00203       docPrm.smthStrategy= SimpleKLParameter::defaultSmoothStrategy;
00204     }
00205     
00206 
00207     docPrm.ADDelta = lemur::api::ParamGetDouble("discountDelta",defaultADDelta);
00208     docPrm.JMLambda = lemur::api::ParamGetDouble("JelinekMercerLambda",defaultJMLambda);
00209     docPrm.DirPrior = lemur::api::ParamGetDouble("DirichletPrior",defaultDirPrior);
00210     
00211     tmpString = RetrievalParameter::getLower("queryUpdateMethod", "mixture");
00212 
00213     if (tmpString == "mixture" || tmpString == "mix" || tmpString == "0")
00214       qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00215     else if (tmpString == "divmin" || tmpString == "div" || tmpString == "1")
00216       qryPrm.fbMethod = SimpleKLParameter::DIVMIN;
00217     else if (tmpString == "markovchain" || tmpString == "mc" || 
00218              tmpString == "2")
00219       qryPrm.fbMethod = SimpleKLParameter::MARKOVCHAIN;
00220     else if (tmpString == "relevancemodel1" || tmpString == "rm1" || 
00221              tmpString == "3")
00222       qryPrm.fbMethod = SimpleKLParameter::RM1;
00223     else if (tmpString == "relevancemodel2" || tmpString == "rm2" || 
00224              tmpString == "4")
00225       qryPrm.fbMethod = SimpleKLParameter::RM2;
00226     else {
00227       cerr << "Unknown queryUpdateMethod " << tmpString 
00228            << ". Using MIXTURE" 
00229            << endl;
00230       qryPrm.fbMethod = SimpleKLParameter::MIXTURE;
00231     }
00232     
00233 
00234     qryPrm.fbCoeff = lemur::api::ParamGetDouble("feedbackCoefficient", defaultFBCoeff);
00235     qryPrm.fbPrTh = lemur::api::ParamGetDouble("feedbackProbThresh", defaultFBPrTh);
00236     qryPrm.fbPrSumTh = lemur::api::ParamGetDouble("feedbackProbSumThresh",
00237                                                   defaultFBPrSumTh);
00238     qryPrm.fbTermCount = lemur::api::ParamGetInt("feedbackTermCount", defaultFBTermCount);
00239     qryPrm.fbMixtureNoise = lemur::api::ParamGetDouble("feedbackMixtureNoise",
00240                                                        defaultFBMixNoise);
00241     qryPrm.emIterations = lemur::api::ParamGetInt("emIterations", defaultEMIterations);
00242                                               
00243   }
00244 }
00246 namespace CORIParameter {
00247   static lemur::utility::String collectionCounts;
00248   static double cstffactor;
00249   static double cstfbaseline;
00250   static double doctffactor;
00251   static double doctfbaseline;
00252   static void get() {
00253     collectionCounts = lemur::api::ParamGetString("collCounts", "USE_INDEX_COUNTS");
00254     cstffactor = lemur::api::ParamGetDouble("CSCTF_factor", 150);
00255     cstfbaseline = lemur::api::ParamGetDouble("CSCTF_baseline", 50);
00256     doctffactor = lemur::api::ParamGetDouble("DOCCTF_factor", 1.5);
00257     doctfbaseline = lemur::api::ParamGetDouble("DOCCTF_baseline", 0.5);
00258   }
00259 }
00261 namespace CosSimParameter {
00262 
00264 
00265   static FeedbackParam fbPrm;
00266   static lemur::utility::String L2NormFile;
00268   
00269   static void get()
00270   {
00271     fbPrm.howManyTerms = lemur::api::ParamGetInt("feedbackTermCount",defaultHowManyTerms);
00272     fbPrm.posCoeff = lemur::api::ParamGetDouble("feedbackPosCoeff", defaultPosCoeff); 
00273     L2NormFile = lemur::api::ParamGetString("L2File", defaultL2File);
00274   }
00275 }
00277 namespace InQueryParameter {
00280   static double fbCoeff = 0.5;
00282   static int fbTermCount = 50;
00284   static double defaultBelief = 0.4;
00286   static bool cacheIDF = false;
00287   static void get()
00288   {
00289     defaultBelief = lemur::api::ParamGetDouble("defaultBelief", defaultBelief);
00290     fbCoeff = lemur::api::ParamGetDouble("feedbackPosCoeff", fbCoeff);
00291     fbTermCount = lemur::api::ParamGetInt("feedbackTermCount", fbTermCount);
00292     string tmpString = RetrievalParameter::getLower("cacheIDF", "true");
00293     cacheIDF = (tmpString == "true" || tmpString == "1");
00294   }
00295 }
00296 
00298 namespace IndriParameter {
00301   static indri::api::Parameters params;
00303   static string stopwords;
00305   static void get()
00306   {
00307     RetrievalParameter::get();
00308     // stopwords
00309     stopwords = lemur::api::ParamGetString("stopwords", "");
00310 
00311     // RM expansion parameters
00312     int fbTerms = lemur::api::ParamGetInt("feedbackTermCount", 10);
00313     double fbOrigWt = lemur::api::ParamGetDouble("fbOrigWt", 0.5);
00314     double fbMu = lemur::api::ParamGetDouble("fbMu", 0);
00315     params.set( "fbDocs" , RetrievalParameter::fbDocCount);
00316     params.set( "fbTerms" , fbTerms );
00317     params.set( "fbOrigWt", fbOrigWt);
00318     params.set( "fbMu", fbMu );
00319     // set retrieval rules -- NB limit to one.
00320     string rule = lemur::api::ParamGetString("rule", "method:dirichlet,mu:2500");
00321     params.set("rule", rule);
00322     // results count
00323     params.set( "count", RetrievalParameter::resultCount );
00324   }
00325 }
00326 
00327 #endif

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4