00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 * 00011 * 26 Sep 2007 - mjh - added call for fieldInfoList 00012 */ 00013 00014 00015 #ifndef _INDEX_HPP 00016 #define _INDEX_HPP 00017 // C. Zhai 02/08/2001 00018 00019 #include "TermInfoList.hpp" 00020 #include "FieldInfoList.hpp" 00021 #include "DocInfoList.hpp" 00022 #include "DocumentManager.hpp" 00023 #include "CollectionProps.hpp" 00024 #include "lemur-platform.h" 00025 #include "lemur-compat.hpp" 00026 00027 namespace lemur 00028 { 00029 namespace api 00030 { 00031 00033 00071 class Index { 00072 public: 00073 00074 virtual ~Index() {}; 00075 00077 00078 00080 virtual bool open(const string &indexName)=0; 00082 00084 00085 00087 virtual TERMID_T term (const TERM_T &word) const=0; 00088 00090 virtual const TERM_T term (TERMID_T termID) const=0; 00091 00093 virtual DOCID_T document (const EXDOCID_T &docIDStr) const=0; 00094 00096 virtual const EXDOCID_T document (DOCID_T docID) const=0; 00097 00100 // virtual const char* docManager(int docID) { return NULL;} 00101 virtual const DocumentManager* docManager(DOCID_T docID) const {return NULL;} 00102 00104 virtual const int field(std::string fieldName) const { return 0; } 00106 virtual const int field(const char *fieldName) const { return 0; } 00107 00109 virtual const std::string field(int fieldID) const { return ""; } 00110 00112 00115 virtual const string termLexiconID() const { return "";} 00116 00118 00120 00121 00123 virtual COUNT_T docCount () const=0; 00124 00126 virtual COUNT_T termCountUnique () const=0; 00127 00129 virtual COUNT_T termCount (TERMID_T termID) const=0; 00130 00132 virtual COUNT_T termCount () const=0; 00133 00135 virtual float docLengthAvg() const=0; 00136 00138 virtual COUNT_T docCount(TERMID_T termID) const=0; 00139 00141 virtual COUNT_T docLength (DOCID_T docID) const=0; 00142 00144 00146 00147 00148 virtual DocInfoList *docInfoList(TERMID_T termID) const=0; 00149 00151 virtual TermInfoList *termInfoList(DOCID_T docID) const=0; 00152 00155 virtual FieldInfoList *fieldInfoList(DOCID_T docID) const { return NULL; } 00156 00159 virtual FieldInfoList *fieldInfoList(DOCID_T docID, int fieldID) const { return NULL; } 00160 00162 00164 00165 00167 virtual std::vector<std::string> documentMetadata(const std::vector< lemur::api::DOCID_T > &documentIDs, const std::string &attributeName) { std::vector<std::string> _blankVector; _blankVector.clear(); return _blankVector; } 00168 00170 virtual std::vector<std::string> documentMetadata(lemur::api::DOCID_T documentID, const std::string &attributeName) { std::vector<std::string> _blankVector; _blankVector.clear(); return _blankVector; } 00171 00173 00174 // returns TermInfoList is sequential representation (not bag of words) 00175 // return NULL list when sequence is not available. 00176 virtual TermInfoList *termInfoListSeq(DOCID_T docID) const { return NULL; } 00177 00180 virtual const lemur::parse::CollectionProps* collectionProps() const { return NULL; } 00181 }; 00182 } 00183 } 00184 00185 00186 #endif 00187 00188 00189