|
Public Member Functions |
| IndexWriter () |
void | write (indri::index::Index &index, std::vector< indri::index::Index::FieldDescription > &fields, indri::index::DeletedDocumentList &deletedList, const std::string &fileName) |
void | write (std::vector< indri::index::Index * > &indexes, std::vector< indri::index::Index::FieldDescription > &fields, indri::index::DeletedDocumentList &deletedList, const std::string &fileName) |
void | write (std::vector< indri::index::Index * > &indexes, std::vector< indri::index::Index::FieldDescription > &fields, std::vector< indri::index::DeletedDocumentList * > &deletedLists, const std::vector< lemur::api::DOCID_T > &documentMaximums, const std::string &path) |
Private Types |
enum | { TOPDOCS_DOCUMENT_COUNT = 1000,
FREQUENT_TERM_COUNT = 1000
} |
Private Member Functions |
void | _writeManifest (const std::string &path) |
void | _writeSkip (indri::file::SequentialWriteBuffer *buffer, lemur::api::DOCID_T document, int length) |
void | _writeBatch (indri::file::SequentialWriteBuffer *buffer, lemur::api::DOCID_T document, int length, indri::utility::Buffer &data) |
void | _writeFieldLists (std::vector< WriterIndexContext * > &contexts, const std::string &path) |
void | _writeFieldList (indri::file::SequentialWriteBuffer &output, int fieldIndex, std::vector< indri::index::DocExtentListIterator * > &iterators, std::vector< WriterIndexContext * > &contexts) |
void | _pushInvertedLists (indri::utility::greedy_vector< WriterIndexContext * > &lists, invertedlist_pqueue &queue) |
void | _fetchMatchingInvertedLists (indri::utility::greedy_vector< WriterIndexContext * > &lists, invertedlist_pqueue &queue) |
void | _writeStatistics (indri::utility::greedy_vector< WriterIndexContext * > &lists, indri::index::TermData *termData, UINT64 &startOffset) |
void | _writeInvertedLists (std::vector< WriterIndexContext * > &contexts) |
void | _storeIdEntry (IndexWriter::keyfile_pair &pair, indri::index::DiskTermData *diskTermData) |
void | _storeStringEntry (IndexWriter::keyfile_pair &pair, indri::index::DiskTermData *diskTermData) |
void | _storeTermEntry (IndexWriter::keyfile_pair &pair, indri::index::DiskTermData *diskTermData) |
void | _storeFrequentTerms () |
void | _addInvertedListData (indri::utility::greedy_vector< WriterIndexContext * > &lists, indri::index::TermData *termData, indri::utility::Buffer &listBuffer, UINT64 &endOffset) |
void | _storeMatchInformation (indri::utility::greedy_vector< WriterIndexContext * > &lists, int sequence, indri::index::TermData *termData, UINT64 startOffset, UINT64 endOffset) |
lemur::api::TERMID_T | _lookupTermID (indri::file::BulkTreeReader &keyfile, const char *term) |
void | _buildIndexContexts (std::vector< WriterIndexContext * > &contexts, std::vector< indri::index::Index * > &indexes, indri::index::DeletedDocumentList &deletedList) |
void | _buildIndexContexts (std::vector< WriterIndexContext * > &contexts, std::vector< indri::index::Index * > &indexes, std::vector< indri::index::DeletedDocumentList * > &deletedLists, const std::vector< lemur::api::DOCID_T > &documentOffsets) |
void | _writeDirectLists (std::vector< WriterIndexContext * > &contexts) |
void | _writeDirectLists (WriterIndexContext *context, indri::file::SequentialWriteBuffer *directOutput, indri::file::SequentialWriteBuffer *lengthsOutput, indri::file::SequentialWriteBuffer *dataOutput) |
void | _constructFiles (const std::string &path) |
void | _closeFiles (const std::string &path) |
void | _openTermsReaders (const std::string &path) |
indri::index::TermTranslator * | _buildTermTranslator (indri::file::BulkTreeReader &newInfrequentTerms, indri::file::BulkTreeReader &newFrequentTerms, indri::index::TermRecorder &oldFrequentTermsRecorder, indri::utility::HashTable< lemur::api::TERMID_T, lemur::api::TERMID_T > *oldInfrequent, indri::index::TermRecorder &newFrequentTermsRecorder, indri::index::Index *index, indri::index::TermBitmap *bitmap) |
Private Attributes |
keyfile_pair | _infrequentTerms |
keyfile_pair | _frequentTerms |
indri::file::File | _frequentTermsData |
indri::file::BulkTreeReader | _infrequentTermsReader |
indri::file::BulkTreeReader | _frequentTermsReader |
indri::file::File | _documentStatistics |
indri::file::File | _documentLengths |
indri::file::File | _invertedFile |
indri::file::File | _directFile |
indri::file::File | _fieldsFile |
indri::file::SequentialWriteBuffer * | _invertedOutput |
indri::utility::greedy_vector<
indri::index::DiskTermData * > | _topTerms |
int | _topTermsCount |
indri::utility::Buffer | _termDataBuffer |
int | _isFrequentCount |
lemur::api::DOCID_T | _documentBase |
indri::index::CorpusStatistics | _corpus |
std::vector< indri::index::Index::FieldDescription > | _fields |
std::vector< indri::index::FieldStatistics > | _fieldData |
char * | _compressedData |
char * | _uncompressedData |
int | _dataSize |