#include <IndexEnvironment.hpp>
Public Member Functions | |
| IndexEnvironment () | |
| ~IndexEnvironment () | |
| void | setOffsetAnnotationsPath (const std::string &offsetAnnotationsRoot) |
| void | setOffsetMetadataPath (const std::string &offsetMetadataRoot) |
| void | setAnchorTextPath (const std::string &anchorTextRoot) |
| void | setDocumentRoot (const std::string &documentRoot) |
| void | addFileClass (const std::string &name, const std::string &iterator, const std::string &parser, const std::string &tokenizer, const std::string &startDocTag, const std::string &endDocTag, const std::string &endMetadataTag, const std::vector< std::string > &include, const std::vector< std::string > &exclude, const std::vector< std::string > &index, const std::vector< std::string > &metadata, const std::map< indri::parse::ConflationPattern *, std::string > &conflations) |
| indri::parse::FileClassEnvironmentFactory::Specification * | getFileClassSpec (const std::string &name) |
| void | addFileClass (const indri::parse::FileClassEnvironmentFactory::Specification &spec) |
| void | setIndexedFields (const std::vector< std::string > &fieldNames) |
| void | setNumericField (const std::string &fieldName, bool isNumeric, const std::string &parserName="") |
| void | setOrdinalField (const std::string &fieldName, bool isOrdinal) |
| void | setParentalField (const std::string &fieldName, bool isParental) |
| void | setMetadataIndexedFields (const std::vector< std::string > &forwardFieldNames, const std::vector< std::string > &backwardFieldNames) |
| void | setStopwords (const std::vector< std::string > &stopwords) |
| void | setStemmer (const std::string &stemmer) |
| void | setMemory (UINT64 memory) |
| void | setNormalization (bool flag) |
| void | setStoreDocs (bool flag) |
| void | setOffsetAnnotationIndexHint (indri::parse::OffsetAnnotationIndexHint hintType) |
| void | create (const std::string &repositoryPath, IndexStatus *callback=0) |
| void | open (const std::string &repositoryPath, IndexStatus *callback=0) |
| void | close () |
| close the index and repository | |
| void | addFile (const std::string &fileName) |
| void | addFile (const std::string &fileName, const std::string &fileClass) |
| lemur::api::DOCID_T | addString (const std::string &documentString, const std::string &fileClass, const std::vector< indri::parse::MetadataPair > &metadata) |
| lemur::api::DOCID_T | addString (const std::string &documentString, const std::string &fileClass, const std::vector< indri::parse::MetadataPair > &metadata, const std::vector< indri::parse::TagExtent * > &tags) |
| lemur::api::DOCID_T | addParsedDocument (ParsedDocument *document) |
| void | deleteDocument (lemur::api::DOCID_T documentID) |
| int | documentsIndexed () |
| Returns the number of documents indexed so far in this session. | |
| int | documentsSeen () |
| void | compact () |
Static Public Member Functions | |
| void | merge (const std::string &outputIndex, const std::vector< std::string > &inputIndexes) |
Private Member Functions | |
| void | _getParsingContext (indri::parse::Parser **parser, indri::parse::Tokenizer **tokenizer, indri::parse::DocumentIterator **iterator, indri::parse::Conflater **conflater, const std::string &extension) |
| std::vector< indri::parse::Transformation * > | _createAnnotators (const std::string &fileName, const std::string &fileClass, indri::parse::Conflater **conflater) |
| ParsedDocument * | _applyAnnotators (std::vector< indri::parse::Transformation * > &annotators, ParsedDocument *parsed) |
Private Attributes | |
| IndexStatus * | _callback |
| Parameters * | _options |
| std::string | _repositoryPath |
| indri::collection::Repository | _repository |
| int | _documents |
| std::string | _error |
| std::string | _offsetAnnotationsRoot |
| std::string | _offsetMetadataRoot |
| std::string | _anchorTextRoot |
| std::string | _documentRoot |
| Parameters | _parameters |
| indri::parse::FileClassEnvironmentFactory | _fileClassFactory |
| indri::parse::AnchorTextAnnotator | _annotator |
| indri::parse::OffsetAnnotationAnnotator | _oa_annotator |
| indri::parse::OffsetMetadataAnnotator | _om_annotator |
| std::map< std::string, indri::parse::FileClassEnvironment * > | _environments |
| int | _documentsIndexed |
| int | _documentsSeen |
Friends | |
| class | QueryEnvironment |
|
|
|
|
|
|
|
||||||||||||
|
|
|
||||||||||||||||
|
|
|
||||||||||||||||||||||||
|
|
|
||||||||||||
|
add a file of the specified file class to the index and repository
|
|
|
Add the text in a file to the index and repository. The fileClass of this file will be chosen based on the file extension. If the file has no extension, it will be skipped. Information about indexing progress will be passed to the callback.
|
|
|
Add a file class.
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Add parsing information for a file class. Data for these parameters is passed into the FileClassEnvironmentFactory
|
|
|
add an already parsed document to the index and repository
|
|
||||||||||||||||||||
|
Adds a string to the index and repository. The documentString is assumed to contain the kind of text that would be found in a file of type fileClass.
|
|
||||||||||||||||
|
Adds a string to the index and repository. The documentString is assumed to contain the kind of text that would be found in a file of type fileClass.
|
|
|
close the index and repository
|
|
|
Permanently deletes information for documents that have been deleted from the index and reclaims used disk space. |
|
||||||||||||
|
create a new index and repository
|
|
|
Delete an existing document.
|
|
|
Returns the number of documents indexed so far in this session.
|
|
|
Returns the number of documents considered for indexing, which is the sum of the documents indexed and the documents skipped. |
|
|
Get a named file class.
|
|
||||||||||||
|
Merges the contents of the indexes referenced in the inputIndexes list and creates a new index called outputIndex. The final index is compacted (contains no information about deleted documents).
|
|
||||||||||||
|
open an existing index and repository
|
|
|
Set anchor text root path.
|
|
|
Set the document root path
|
|
|
Set names of fields to be indexed. This call indicates to the index that information about these fields should be stored in the index so they can be used in queries. This does not affect whether or not the text in a particular field is stored in an index.
|
|
|
set the amount of memory to use for internal structures
|
|
||||||||||||
|
Set names of metadata fields to be indexed for fast retrieval. The forward fields are indexed in a B-Tree mapping (documentID, metadataValue). If a field is not forward indexed, the documentMetadata calls will still work, but they will be slower (the document has to be retrieved, decompressed and parsed to get the metadata back, instead of just a B-Tree lookup). The backward indexed fields store a mapping of (metadataValue, documentID). If a field is not backward indexed, the documentIDsFromMetadata and documentFromMetadata calls will not work.
|
|
|
set normalization of case and some punctuation; default is true (normalize during indexing and at query time)
|
|
||||||||||||||||
|
Set the numeric property of a field.
|
|
|
provides the indexer with the hint strategy to use for speed optimizations for indexing offset annotations
|
|
|
Set offset annotations root path.
|
|
|
Set offset metadata root path.
|
|
||||||||||||
|
Set the ordinal property of a field.
|
|
||||||||||||
|
Set the parental property of a field.
|
|
|
set the stemmer to use
|
|
|
set the list of stopwords
|
|
|
set storing of ParsedDocuments; default is true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1.3.4