|
Public Types |
enum | { MAX_DOCLENGTHS_CACHE = 20*1024*1024
} |
Public Member Functions |
| DiskIndex () |
void | open (const std::string &base, const std::string &relative) |
void | close () |
const std::string & | path () |
lemur::api::DOCID_T | documentBase () |
int | field (const char *fieldName) |
int | field (const std::string &fieldName) |
std::string | field (int fieldID) |
lemur::api::TERMID_T | term (const char *term) |
lemur::api::TERMID_T | term (const std::string &term) |
std::string | term (lemur::api::TERMID_T termID) |
int | documentLength (lemur::api::DOCID_T documentID) |
UINT64 | documentCount () |
UINT64 | documentCount (const std::string &term) |
lemur::api::DOCID_T | documentMaximum () |
| The documentMaximum is at least one greater than the largest documentID used in this index.
|
UINT64 | uniqueTermCount () |
UINT64 | termCount (const std::string &term) |
UINT64 | termCount () |
UINT64 | fieldTermCount (const std::string &field) |
UINT64 | fieldTermCount (const std::string &field, const std::string &term) |
UINT64 | fieldDocumentCount (const std::string &field) |
UINT64 | fieldDocumentCount (const std::string &field, const std::string &term) |
DocListIterator * | docListIterator (lemur::api::TERMID_T termID) |
DocListIterator * | docListIterator (const std::string &term) |
DocListFileIterator * | docListFileIterator () |
DocExtentListIterator * | fieldListIterator (int fieldID) |
DocExtentListIterator * | fieldListIterator (const std::string &field) |
const TermList * | termList (lemur::api::DOCID_T documentID) |
TermListFileIterator * | termListFileIterator () |
VocabularyIterator * | vocabularyIterator () |
VocabularyIterator * | frequentVocabularyIterator () |
VocabularyIterator * | infrequentVocabularyIterator () |
DocumentDataIterator * | documentDataIterator () |
indri::thread::Lockable * | iteratorLock () |
indri::thread::Lockable * | statisticsLock () |
Private Member Functions |
indri::index::DiskTermData * | _fetchTermData (lemur::api::TERMID_T termID) |
indri::index::DiskTermData * | _fetchTermData (const char *termString) |
void | _readManifest (const std::string &manifestPath) |
Private Attributes |
indri::thread::Mutex | _lock |
std::string | _path |
indri::file::BulkTreeReader | _frequentStringToTerm |
indri::file::BulkTreeReader | _infrequentStringToTerm |
indri::file::BulkTreeReader | _frequentIdToTerm |
indri::file::BulkTreeReader | _infrequentIdToTerm |
indri::file::File | _frequentTermsData |
indri::file::File | _documentLengths |
indri::file::File | _documentStatistics |
indri::file::File | _invertedFile |
indri::file::File | _directFile |
indri::file::File | _fieldsFile |
indri::file::SequentialReadBuffer | _lengthsBuffer |
std::vector< FieldStatistics > | _fieldData |
lemur::api::DOCID_T | _documentBase |
int | _infrequentTermBase |
CorpusStatistics | _corpusStatistics |