00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef INDRI_QUERYEXPANDER_HPP
00019 #define INDRI_QUERYEXPANDER_HPP
00020
00021 #include <string>
00022 #include <vector>
00023 #include <map>
00024
00025 #include "indri/QueryEnvironment.hpp"
00026 #include "indri/Parameters.hpp"
00027 namespace indri
00028 {
00029 namespace query
00030 {
00031
00032 struct QueryExpanderSort {
00033 public:
00034 bool operator() ( const std::pair<std::string, double>& one, const std::pair<std::string, double>& two ) const {
00035 return one.second > two.second;
00036 }
00037 };
00038
00039 class QueryExpander {
00040 private:
00041 std::map<std::string, UINT64> _cf_cache;
00042
00043 protected:
00044 indri::api::QueryEnvironment * _env;
00045 indri::api::Parameters _param;
00046
00047 std::map< std::string, bool > _stopwords;
00048
00049 std::vector<indri::api::DocumentVector*> getDocumentVectors( std::vector<indri::api::ScoredExtentResult>& results, int rmDocs );
00050 std::vector<std::string> * getVocabulary( std::vector<indri::api::ScoredExtentResult>& results, int rmDocs );
00051 std::vector<std::string> * getVocabulary( std::vector<indri::api::DocumentVector*>& docVectors );
00052 std::string buildQuery( const std::string& originalQuery, double originalWeight,
00053 const std::vector< std::pair<std::string, double> >& expansionTerms,
00054 int termCount );
00055 UINT64 getCF( const std::string& term );
00056
00057 public:
00058 QueryExpander( indri::api::QueryEnvironment * env , indri::api::Parameters& param );
00059 virtual ~QueryExpander() {};
00060
00061
00062 std::vector<indri::api::ScoredExtentResult> runExpandedQuery( std::string originalQuery , int resultsRequested , bool verbose = false );
00063
00064
00065 virtual std::string expand( std::string originalQuery , std::vector<indri::api::ScoredExtentResult>& results ) = 0;
00066 };
00067 }
00068 }
00069
00070
00071 #endif