00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // Index 00015 // 00016 // 15 November 2004 -- tds 00017 // 00018 00019 #ifndef INDRI_INDEX_HPP 00020 #define INDRI_INDEX_HPP 00021 00022 #include <string> 00023 #include <vector> 00024 00025 #include "indri/DocListIterator.hpp" 00026 #include "indri/DocExtentListIterator.hpp" 00027 #include "indri/DocListFileIterator.hpp" 00028 #include "indri/FieldListIterator.hpp" 00029 #include "indri/VocabularyIterator.hpp" 00030 #include "indri/TermList.hpp" 00031 #include "indri/TermListFileIterator.hpp" 00032 #include "indri/DocumentDataIterator.hpp" 00033 #include "indri/Lockable.hpp" 00034 #include "IndexTypes.hpp" 00035 00036 namespace indri { 00037 namespace index { 00038 00039 class Index { 00040 public: 00042 struct FieldDescription { 00044 std::string name; 00046 bool numeric; 00048 std::string parserName; 00050 bool ordinal; 00052 bool parental; 00053 }; 00054 00055 virtual ~Index() {}; 00056 00057 // 00058 // Actions 00059 // 00060 00061 virtual void close() = 0; 00062 00063 // 00064 // Counts 00065 // 00066 00067 virtual lemur::api::DOCID_T documentBase() = 0; 00069 virtual lemur::api::DOCID_T documentMaximum() = 0; 00070 00071 virtual lemur::api::TERMID_T term( const char* term ) = 0; 00072 virtual lemur::api::TERMID_T term( const std::string& term ) = 0; 00073 virtual std::string term( lemur::api::TERMID_T termID ) = 0; 00074 00075 virtual int field( const char* fieldName ) = 0; 00076 virtual int field( const std::string& fieldName ) = 0; 00077 virtual std::string field( int fieldID ) = 0; 00078 00079 virtual int documentLength( lemur::api::DOCID_T documentID ) = 0; 00080 virtual UINT64 documentCount() = 0; 00081 virtual UINT64 documentCount( const std::string& term ) = 0; 00082 00083 virtual UINT64 uniqueTermCount() = 0; 00084 00085 virtual UINT64 termCount( const std::string& term ) = 0; 00086 virtual UINT64 termCount() = 0; 00087 00088 virtual UINT64 fieldTermCount( const std::string& field ) = 0; 00089 virtual UINT64 fieldTermCount( const std::string& field, const std::string& term ) = 0; 00090 00091 virtual UINT64 fieldDocumentCount( const std::string& field ) = 0; 00092 virtual UINT64 fieldDocumentCount( const std::string& field, const std::string& term ) = 0; 00093 00094 // Lists 00095 virtual DocListIterator* docListIterator( lemur::api::TERMID_T termID ) = 0; 00096 virtual DocListIterator* docListIterator( const std::string& term ) = 0; 00097 virtual DocListFileIterator* docListFileIterator() = 0; 00098 virtual DocExtentListIterator* fieldListIterator( int fieldID ) = 0; 00099 virtual DocExtentListIterator* fieldListIterator( const std::string& field ) = 0; 00100 virtual const TermList* termList( lemur::api::DOCID_T documentID ) = 0; 00101 virtual TermListFileIterator* termListFileIterator() = 0; 00102 virtual DocumentDataIterator* documentDataIterator() = 0; 00103 00104 // Vocabulary 00105 virtual VocabularyIterator* frequentVocabularyIterator() = 0; 00106 virtual VocabularyIterator* infrequentVocabularyIterator() = 0; 00107 virtual VocabularyIterator* vocabularyIterator() = 0; 00108 00109 // Locks 00110 virtual indri::thread::Lockable* iteratorLock() = 0; 00111 virtual indri::thread::Lockable* statisticsLock() = 0; 00112 00113 }; 00114 } 00115 } 00116 00117 #endif // INDRI_INDEX_HPP 00118