#include <Index.hpp>
Inheritance diagram for lemur::api::Index:
Public Member Functions | |
virtual | ~Index () |
virtual TermInfoList * | termInfoListSeq (DOCID_T docID) const |
virtual const lemur::parse::CollectionProps * | collectionProps () const |
Open index | |
virtual bool | open (const string &indexName)=0 |
Open previously created Index, return true if opened successfully, indexName should be the full name of the table-of-content file for the index. E.g., "index.bsc" for an index built with the basic indexer. | |
Spelling and index conversion | |
virtual TERMID_T | term (const TERM_T &word) const =0 |
Convert a term spelling to a termID, returns 0 if out of vocabulary. Valid index starts at 1. | |
virtual const TERM_T | term (TERMID_T termID) const =0 |
Convert a valid termID to its spelling. | |
virtual DOCID_T | document (const EXDOCID_T &docIDStr) const =0 |
Convert a spelling to docID, returns 0 if out of vocabulary. Valid index starts at 1. | |
virtual const EXDOCID_T | document (DOCID_T docID) const =0 |
Convert a valid docID to its spelling. | |
virtual const DocumentManager * | docManager (DOCID_T docID) const |
virtual const int | field (std::string fieldName) const |
Convert a field name to a field ID (for those index types that support fields). | |
virtual const int | field (const char *fieldName) const |
Convert a field name to a field ID (for those index types that support fields). | |
virtual const std::string | field (int fieldID) const |
Convert a field ID to a field name (for those index types that support fields). | |
virtual const string | termLexiconID () const |
Return a string ID for the term lexicon (usually the file name of the lexicon). | |
Summary counts | |
virtual COUNT_T | docCount () const =0 |
Total count (i.e., number) of documents in collection. | |
virtual COUNT_T | termCountUnique () const =0 |
Total count of unique terms in collection, i.e., the term vocabulary size. | |
virtual COUNT_T | termCount (TERMID_T termID) const =0 |
Total counts of a term in collection. | |
virtual COUNT_T | termCount () const =0 |
Total counts of all terms in collection. | |
virtual float | docLengthAvg () const =0 |
Average document length. | |
virtual COUNT_T | docCount (TERMID_T termID) const =0 |
Total counts of doc with a given term. | |
virtual COUNT_T | docLength (DOCID_T docID) const =0 |
Total counts of terms in a document. | |
Index entry access | |
virtual DocInfoList * | docInfoList (TERMID_T termID) const =0 |
returns a new instance of DocInfoList which represents the doc entries in a term index, you must delete the instance later. DocInfoList | |
virtual TermInfoList * | termInfoList (DOCID_T docID) const =0 |
returns a new instance of TermInfoList which represents the word entries in a document index, you must delete the instance later. TermInfoList | |
virtual FieldInfoList * | fieldInfoList (DOCID_T docID) const |
virtual FieldInfoList * | fieldInfoList (DOCID_T docID, int fieldID) const |
document metadata access | |
virtual std::vector< std::string > | documentMetadata (const std::vector< lemur::api::DOCID_T > &documentIDs, const std::string &attributeName) |
Fetch the named metadata attribute for a list of document ids. | |
virtual std::vector< std::string > | documentMetadata (lemur::api::DOCID_T documentID, const std::string &attributeName) |
Fetch the named metadata attribute for a single document id. |
This is an abstract class that provides a uniform interface for access to an indexed document collection. The following is an example of using it.
Index &myIndex;
myIndex.open("index-file");
int t1; ...
// now fetch doc info list for term t1 // this returns a dynamic instance, so you'll need to delete it DocInfoList *docList = myIndex.docInfoList(t1);
docList->startIteration();
DocInfo *entry; while (docList->hasMore()) { entry = docList->nextEntry(); // this returns a pointer to a *static* memory, do don't delete entry!
cout << "entry doc id: "<< entry->docID() <<endl; cout << "entry term count: "<< entry->termCount() << endl; }
delete docList;
Constructor & Destructor Documentation
|
|
|
return whatever collection properties might have been passed in indexing with call to PushIndex::endCollection(CollectionProps) Reimplemented in lemur::index::KeyfileIncIndex. |
|
Total counts of doc with a given term.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Total count (i.e., number) of documents in collection.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
returns a new instance of DocInfoList which represents the doc entries in a term index, you must delete the instance later. DocInfoList
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Total counts of terms in a document.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Average document length.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
A String identifier for the document manager to get at the source of the document with this document id Reimplemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Convert a valid docID to its spelling.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Convert a spelling to docID, returns 0 if out of vocabulary. Valid index starts at 1.
|
|
Fetch the named metadata attribute for a single document id.
Reimplemented in lemur::index::LemurIndriIndex. |
|
Fetch the named metadata attribute for a list of document ids.
Reimplemented in lemur::index::LemurIndriIndex. |
|
Convert a field ID to a field name (for those index types that support fields).
Reimplemented in lemur::index::LemurIndriIndex. |
|
Convert a field name to a field ID (for those index types that support fields).
Reimplemented in lemur::index::LemurIndriIndex. |
|
Convert a field name to a field ID (for those index types that support fields).
Reimplemented in lemur::index::LemurIndriIndex. |
|
returns a new instance of FieldInfoList which represents field entities in a document index for a specific field, you must delete the instance later.
Reimplemented in lemur::index::LemurIndriIndex. |
|
returns a new instance of FieldInfoList which represents all field entities in a document index, you must delete the instance later.
Reimplemented in lemur::index::LemurIndriIndex. |
|
Open previously created Index, return true if opened successfully,
Implemented in lemur::index::KeyfileIncIndex. |
|
Convert a valid termID to its spelling.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Convert a term spelling to a termID, returns 0 if out of vocabulary. Valid index starts at 1.
|
|
Total counts of all terms in collection.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Total counts of a term in collection.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Total count of unique terms in collection, i.e., the term vocabulary size.
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
returns a new instance of TermInfoList which represents the word entries in a document index, you must delete the instance later. TermInfoList
Implemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Reimplemented in lemur::index::KeyfileIncIndex, and lemur::index::LemurIndriIndex. |
|
Return a string ID for the term lexicon (usually the file name of the lexicon). This function should be pure virtual; the default implementation is just for convenience. Appropriate implementation to be done in the future. |