00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 * 00011 * 26 Sep 2007 - mjh - added calls for field(...) and fieldInfoList(...) 00012 */ 00013 00014 00015 00016 #ifndef LEMURINDRI_INDEX_HPP 00017 #define LEMURINDRI_INDEX_HPP 00018 00019 /* 00020 * NAME DATE - COMMENTS 00021 * dmf 09/04 - Lemur Index API wrapper on Indri Repository 00022 */ 00023 00024 #include "Index.hpp" 00025 #include "indri/Repository.hpp" 00026 00027 namespace lemur 00028 { 00029 namespace index 00030 { 00031 00034 class LemurIndriIndex : public lemur::api::Index { 00035 public: 00037 LemurIndriIndex(); 00038 00039 virtual ~LemurIndriIndex(); 00040 00042 00043 00045 bool open(const std::string& indexName); 00046 00048 bool open( const char* indexName ) { 00049 return open(string(indexName)); 00050 } 00052 void close(); 00054 00056 00057 00059 lemur::api::TERMID_T term(const lemur::api::TERM_T &word) const; 00060 00062 const lemur::api::TERM_T term(lemur::api::TERMID_T termID) const; 00063 00065 lemur::api::DOCID_T document(const lemur::api::EXDOCID_T &docIDStr) const; 00066 00068 const lemur::api::EXDOCID_T document(lemur::api::DOCID_T docID) const; 00069 00071 00073 00074 00076 lemur::api::COUNT_T docCount() const; 00077 00079 lemur::api::COUNT_T termCountUnique() const; 00080 00082 lemur::api::COUNT_T termCount(lemur::api::TERMID_T termID) const; 00083 00085 lemur::api::COUNT_T termCount() const; 00086 00088 float docLengthAvg() const; 00089 00091 lemur::api::COUNT_T docCount(lemur::api::TERMID_T termID) const; 00092 00094 lemur::api::COUNT_T docLength( lemur::api::DOCID_T documentID ) const; 00095 00097 00099 lemur::api::DocInfoList* docInfoList(lemur::api::TERMID_T termID) const; 00100 00102 lemur::api::TermInfoList* termInfoList(lemur::api::DOCID_T docID) const; 00103 00105 lemur::api::TermInfoList* termInfoListSeq(lemur::api::DOCID_T docID) const; 00106 00107 const lemur::api::DocumentManager* docManager(lemur::api::DOCID_T docID) const { 00108 return _docMgr; 00109 } 00110 00111 const string &getRepositoryName() const {return _repositoryName;} 00112 00114 const int field(std::string fieldName) const; 00115 00117 const int field(const char *fieldName) const; 00118 00120 const std::string field(int fieldID) const; 00121 00124 virtual lemur::api::FieldInfoList *fieldInfoList(lemur::api::DOCID_T docID) const; 00125 00128 virtual lemur::api::FieldInfoList *fieldInfoList(lemur::api:: 00129 DOCID_T docID, int fieldID) const; 00130 00132 virtual std::vector<std::string> documentMetadata(const std::vector< lemur::api::DOCID_T > &documentIDs, const std::string &attributeName); 00133 00135 virtual std::vector<std::string> documentMetadata(lemur::api::DOCID_T documentID, const std::string &attributeName); 00136 00137 00138 private: 00139 indri::index::Index* _indexWithDocument( indri::collection::Repository::index_state& indexes, lemur::api::DOCID_T documentID ) const; 00140 indri::collection::Repository *_repository; 00141 string _repositoryName; 00142 lemur::api::DocumentManager *_docMgr; 00143 }; 00144 } 00145 } 00146 00147 #endif // LEMURINDRI_INDEX_HPP 00148