00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_QUERYENVIRONMENT_HPP
00020 #define INDRI_QUERYENVIRONMENT_HPP
00021
00022 #include <map>
00023 #include "indri/ScoredExtentResult.hpp"
00024 #include "indri/QueryServer.hpp"
00025 #include "indri/NetworkStream.hpp"
00026 #include "indri/NetworkMessageStream.hpp"
00027 #include "indri/Parameters.hpp"
00028 #include "indri/ParsedDocument.hpp"
00029 #include "indri/Repository.hpp"
00030 #include "indri/QueryAnnotation.hpp"
00031 #include "IndexTypes.hpp"
00032
00033 namespace indri
00034 {
00035 namespace api
00036 {
00043 typedef struct QueryRequest
00044 {
00047 enum Options {
00049 HTMLSnippet = 1,
00051 TextSnippet = 2
00052 };
00054 std::string query;
00056 std::vector<std::string> formulators;
00058 std::vector<std::string> metadata;
00060 std::vector<lemur::api::DOCID_T> docSet;
00062 int resultsRequested;
00064 int startNum;
00066 enum Options options;
00067 } QueryRequest;
00068
00071 typedef struct MetadataPair
00072 {
00074 std::string key;
00076 std::string value;
00077 } MetadataPair;
00078
00085 typedef struct QueryResult
00086 {
00088 std::string snippet;
00090 std::string documentName;
00092 lemur::api::DOCID_T docid;
00094 double score;
00096 int begin;
00098 int end;
00100 std::vector<indri::api::MetadataPair> metadata;
00101 } QueryResult;
00102
00108 typedef struct QueryResults
00109 {
00111 float parseTime;
00113 float executeTime;
00115 float documentsTime;
00117 int estimatedMatches;
00119 std::vector<QueryResult> results;
00120 } QueryResults;
00121
00128 class QueryEnvironment {
00129 private:
00130
00131
00132 std::map<std::string, std::pair<indri::server::QueryServer *, indri::net::NetworkStream *> > _serverNameMap;
00133 std::vector<indri::server::QueryServer*> _servers;
00134
00135
00136 std::map<std::string, std::pair<indri::server::QueryServer *, indri::collection::Repository *> > _repositoryNameMap;
00137 std::vector<indri::collection::Repository*> _repositories;
00138 std::vector<indri::net::NetworkStream*> _streams;
00139 std::vector<indri::net::NetworkMessageStream*> _messageStreams;
00140
00141 Parameters _parameters;
00142 bool _baseline;
00143
00144 void _mergeQueryResults( indri::infnet::InferenceNetwork::MAllResults& results, std::vector<indri::server::QueryServerResponse*>& responses );
00145 void _copyStatistics( std::vector<indri::lang::RawScorerNode*>& scorerNodes, indri::infnet::InferenceNetwork::MAllResults& statisticsResults );
00146
00147 std::vector<indri::server::QueryServerResponse*> _runServerQuery( std::vector<indri::lang::Node*>& roots, int resultsRequested );
00148 void _sumServerQuery( indri::infnet::InferenceNetwork::MAllResults& results, std::vector<indri::lang::Node*>& roots, int resultsRequested );
00149 void _mergeServerQuery( indri::infnet::InferenceNetwork::MAllResults& results, std::vector<indri::lang::Node*>& roots, int resultsRequested );
00150 void _annotateQuery( indri::infnet::InferenceNetwork::MAllResults& results, const std::vector<lemur::api::DOCID_T>& documentIDs, std::string& annotatorName, indri::lang::Node* queryRoot );
00151
00152 std::vector<indri::api::ScoredExtentResult> _runQuery( indri::infnet::InferenceNetwork::MAllResults& results,
00153 const std::string& q,
00154 int resultsRequested,
00155 const std::vector<lemur::api::DOCID_T>* documentIDs,
00156 QueryAnnotation** annotation,
00157 const std::string &queryType = "indri" );
00158 void _scoredQuery( indri::infnet::InferenceNetwork::MAllResults& results, indri::lang::Node* queryRoot, std::string& accumulatorName, int resultsRequested, const std::vector<lemur::api::DOCID_T>* documentSet );
00159
00160 QueryEnvironment( QueryEnvironment& other ) {}
00161
00162 public:
00163 QueryEnvironment();
00164 ~QueryEnvironment();
00167 void setMemory( UINT64 memory );
00170 void setBaseline(const std::string &baseline);
00171 void setSingleBackgroundModel( bool background );
00174 void setScoringRules( const std::vector<std::string>& rules );
00177 void setStopwords( const std::vector<std::string>& stopwords );
00180 void addServer( const std::string& hostname );
00183 void addIndex( const std::string& pathname );
00187 void addIndex( class IndexEnvironment& environment );
00189 void close();
00192 void removeServer( const std::string& hostname );
00195 void removeIndex( const std::string& pathname );
00196
00200 QueryResults runQuery(QueryRequest &request);
00201
00206 std::vector<indri::api::ScoredExtentResult> runQuery( const std::string& query, int resultsRequested, const std::string &queryType = "indri" );
00207
00213 std::vector<indri::api::ScoredExtentResult> runQuery( const std::string& query, const std::vector<lemur::api::DOCID_T>& documentSet, int resultsRequested, const std::string &queryType = "indri" );
00214
00219 QueryAnnotation* runAnnotatedQuery( const std::string& query, int resultsRequested, const std::string &queryType = "indri" );
00220
00226 QueryAnnotation* runAnnotatedQuery( const std::string& query, const std::vector<lemur::api::DOCID_T>& documentSet, int resultsRequested, const std::string &queryType = "indri" );
00227
00228
00233 std::vector<indri::api::ParsedDocument*> documents( const std::vector<lemur::api::DOCID_T>& documentIDs );
00238 std::vector<indri::api::ParsedDocument*> documents( const std::vector<indri::api::ScoredExtentResult>& results );
00243 std::vector<std::string> documentMetadata( const std::vector<lemur::api::DOCID_T>& documentIDs, const std::string& attributeName );
00248 std::vector<std::string> documentMetadata( const std::vector<indri::api::ScoredExtentResult>& documentIDs, const std::string& attributeName );
00249
00253 std::vector<std::string> pathNames( const std::vector<indri::api::ScoredExtentResult>& results );
00254
00255
00260 std::vector<indri::api::ParsedDocument*> documentsFromMetadata( const std::string& attributeName, const std::vector<std::string>& attributeValues );
00261
00266 std::vector<lemur::api::DOCID_T> documentIDsFromMetadata( const std::string& attributeName, const std::vector<std::string>& attributeValue );
00267
00270 INT64 termCount();
00274 INT64 termCount( const std::string& term );
00278 INT64 stemCount( const std::string& term );
00284 INT64 termFieldCount( const std::string& term, const std::string& field );
00290 INT64 stemFieldCount( const std::string& term, const std::string& field );
00293 double expressionCount( const std::string& expression,
00294 const std::string &queryType = "indri" );
00300 std::vector<ScoredExtentResult> expressionList( const std::string& expression,
00301 const std::string& queryType = "indri" );
00304 std::vector<std::string> fieldList();
00307 INT64 documentCount();
00311 INT64 documentCount( const std::string& term );
00312
00316 INT64 documentStemCount( const std::string& stem );
00317
00321 int documentLength(lemur::api::DOCID_T documentID);
00322
00327 std::vector<DocumentVector*> documentVectors( const std::vector<lemur::api::DOCID_T>& documentIDs );
00328
00332 void setMaxWildcardTerms(int maxTerms);
00333 const std::vector<indri::server::QueryServer*>& getServers() const { return _servers;
00334 }
00335
00336 };
00337 }
00338 }
00339
00340 #endif // INDRI_QUERYENVIRONMENT_HPP
00341