Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

indri::index::IndexWriter Class Reference

#include <IndexWriter.hpp>

List of all members.

Public Member Functions

 IndexWriter ()
void write (indri::index::Index &index, std::vector< indri::index::Index::FieldDescription > &fields, indri::index::DeletedDocumentList &deletedList, const std::string &fileName)
void write (std::vector< indri::index::Index * > &indexes, std::vector< indri::index::Index::FieldDescription > &fields, indri::index::DeletedDocumentList &deletedList, const std::string &fileName)
void write (std::vector< indri::index::Index * > &indexes, std::vector< indri::index::Index::FieldDescription > &fields, std::vector< indri::index::DeletedDocumentList * > &deletedLists, const std::vector< lemur::api::DOCID_T > &documentMaximums, const std::string &path)

Private Types

enum  { TOPDOCS_DOCUMENT_COUNT = 1000, FREQUENT_TERM_COUNT = 1000 }

Private Member Functions

void _writeManifest (const std::string &path)
void _writeSkip (indri::file::SequentialWriteBuffer *buffer, lemur::api::DOCID_T document, int length)
void _writeBatch (indri::file::SequentialWriteBuffer *buffer, lemur::api::DOCID_T document, int length, indri::utility::Buffer &data)
void _writeFieldLists (std::vector< WriterIndexContext * > &contexts, const std::string &path)
void _writeFieldList (indri::file::SequentialWriteBuffer &output, int fieldIndex, std::vector< indri::index::DocExtentListIterator * > &iterators, std::vector< WriterIndexContext * > &contexts)
void _pushInvertedLists (indri::utility::greedy_vector< WriterIndexContext * > &lists, invertedlist_pqueue &queue)
void _fetchMatchingInvertedLists (indri::utility::greedy_vector< WriterIndexContext * > &lists, invertedlist_pqueue &queue)
void _writeStatistics (indri::utility::greedy_vector< WriterIndexContext * > &lists, indri::index::TermData *termData, UINT64 &startOffset)
void _writeInvertedLists (std::vector< WriterIndexContext * > &contexts)
void _storeIdEntry (IndexWriter::keyfile_pair &pair, indri::index::DiskTermData *diskTermData)
void _storeStringEntry (IndexWriter::keyfile_pair &pair, indri::index::DiskTermData *diskTermData)
void _storeTermEntry (IndexWriter::keyfile_pair &pair, indri::index::DiskTermData *diskTermData)
void _storeFrequentTerms ()
void _addInvertedListData (indri::utility::greedy_vector< WriterIndexContext * > &lists, indri::index::TermData *termData, indri::utility::Buffer &listBuffer, UINT64 &endOffset)
void _storeMatchInformation (indri::utility::greedy_vector< WriterIndexContext * > &lists, int sequence, indri::index::TermData *termData, UINT64 startOffset, UINT64 endOffset)
lemur::api::TERMID_T _lookupTermID (indri::file::BulkTreeReader &keyfile, const char *term)
void _buildIndexContexts (std::vector< WriterIndexContext * > &contexts, std::vector< indri::index::Index * > &indexes, indri::index::DeletedDocumentList &deletedList)
void _buildIndexContexts (std::vector< WriterIndexContext * > &contexts, std::vector< indri::index::Index * > &indexes, std::vector< indri::index::DeletedDocumentList * > &deletedLists, const std::vector< lemur::api::DOCID_T > &documentOffsets)
void _writeDirectLists (std::vector< WriterIndexContext * > &contexts)
void _writeDirectLists (WriterIndexContext *context, indri::file::SequentialWriteBuffer *directOutput, indri::file::SequentialWriteBuffer *lengthsOutput, indri::file::SequentialWriteBuffer *dataOutput)
void _constructFiles (const std::string &path)
void _closeFiles (const std::string &path)
void _openTermsReaders (const std::string &path)
indri::index::TermTranslator_buildTermTranslator (indri::file::BulkTreeReader &newInfrequentTerms, indri::file::BulkTreeReader &newFrequentTerms, indri::index::TermRecorder &oldFrequentTermsRecorder, indri::utility::HashTable< lemur::api::TERMID_T, lemur::api::TERMID_T > *oldInfrequent, indri::index::TermRecorder &newFrequentTermsRecorder, indri::index::Index *index, indri::index::TermBitmap *bitmap)

Private Attributes

keyfile_pair _infrequentTerms
keyfile_pair _frequentTerms
indri::file::File _frequentTermsData
indri::file::BulkTreeReader _infrequentTermsReader
indri::file::BulkTreeReader _frequentTermsReader
indri::file::File _documentStatistics
indri::file::File _documentLengths
indri::file::File _invertedFile
indri::file::File _directFile
indri::file::File _fieldsFile
indri::file::SequentialWriteBuffer_invertedOutput
indri::utility::greedy_vector<
indri::index::DiskTermData * > 
_topTerms
int _topTermsCount
indri::utility::Buffer _termDataBuffer
int _isFrequentCount
lemur::api::DOCID_T _documentBase
indri::index::CorpusStatistics _corpus
std::vector< indri::index::Index::FieldDescription_fields
std::vector< indri::index::FieldStatistics_fieldData
char * _compressedData
char * _uncompressedData
int _dataSize


Member Enumeration Documentation

anonymous enum [private]
 

Enumeration values:
TOPDOCS_DOCUMENT_COUNT 
FREQUENT_TERM_COUNT 


Constructor & Destructor Documentation

IndexWriter::IndexWriter  ) 
 


Member Function Documentation

void IndexWriter::_addInvertedListData indri::utility::greedy_vector< WriterIndexContext * > &  lists,
indri::index::TermData termData,
indri::utility::Buffer listBuffer,
UINT64 endOffset
[private]
 

void IndexWriter::_buildIndexContexts std::vector< WriterIndexContext * > &  contexts,
std::vector< indri::index::Index * > &  indexes,
std::vector< indri::index::DeletedDocumentList * > &  deletedLists,
const std::vector< lemur::api::DOCID_T > &  documentOffsets
[private]
 

void IndexWriter::_buildIndexContexts std::vector< WriterIndexContext * > &  contexts,
std::vector< indri::index::Index * > &  indexes,
indri::index::DeletedDocumentList deletedList
[private]
 

indri::index::TermTranslator * IndexWriter::_buildTermTranslator indri::file::BulkTreeReader newInfrequentTerms,
indri::file::BulkTreeReader newFrequentTerms,
indri::index::TermRecorder oldFrequentTermsRecorder,
indri::utility::HashTable< lemur::api::TERMID_T, lemur::api::TERMID_T > *  oldInfrequent,
indri::index::TermRecorder newFrequentTermsRecorder,
indri::index::Index index,
indri::index::TermBitmap bitmap
[private]
 

void IndexWriter::_closeFiles const std::string &  path  )  [private]
 

void IndexWriter::_constructFiles const std::string &  path  )  [private]
 

void IndexWriter::_fetchMatchingInvertedLists indri::utility::greedy_vector< WriterIndexContext * > &  lists,
invertedlist_pqueue queue
[private]
 

lemur::api::TERMID_T IndexWriter::_lookupTermID indri::file::BulkTreeReader keyfile,
const char *  term
[private]
 

void IndexWriter::_openTermsReaders const std::string &  path  )  [private]
 

void IndexWriter::_pushInvertedLists indri::utility::greedy_vector< WriterIndexContext * > &  lists,
invertedlist_pqueue queue
[private]
 

void IndexWriter::_storeFrequentTerms  )  [private]
 

void IndexWriter::_storeIdEntry IndexWriter::keyfile_pair pair,
indri::index::DiskTermData diskTermData
[private]
 

void IndexWriter::_storeMatchInformation indri::utility::greedy_vector< WriterIndexContext * > &  lists,
int  sequence,
indri::index::TermData termData,
UINT64  startOffset,
UINT64  endOffset
[private]
 

void IndexWriter::_storeStringEntry IndexWriter::keyfile_pair pair,
indri::index::DiskTermData diskTermData
[private]
 

void IndexWriter::_storeTermEntry IndexWriter::keyfile_pair pair,
indri::index::DiskTermData diskTermData
[private]
 

void IndexWriter::_writeBatch indri::file::SequentialWriteBuffer buffer,
lemur::api::DOCID_T  document,
int  length,
indri::utility::Buffer data
[private]
 

void IndexWriter::_writeDirectLists WriterIndexContext context,
indri::file::SequentialWriteBuffer directOutput,
indri::file::SequentialWriteBuffer lengthsOutput,
indri::file::SequentialWriteBuffer dataOutput
[private]
 

void IndexWriter::_writeDirectLists std::vector< WriterIndexContext * > &  contexts  )  [private]
 

void IndexWriter::_writeFieldList indri::file::SequentialWriteBuffer output,
int  fieldIndex,
std::vector< indri::index::DocExtentListIterator * > &  iterators,
std::vector< WriterIndexContext * > &  contexts
[private]
 

void IndexWriter::_writeFieldLists std::vector< WriterIndexContext * > &  contexts,
const std::string &  path
[private]
 

void IndexWriter::_writeInvertedLists std::vector< WriterIndexContext * > &  contexts  )  [private]
 

void IndexWriter::_writeManifest const std::string &  path  )  [private]
 

void IndexWriter::_writeSkip indri::file::SequentialWriteBuffer buffer,
lemur::api::DOCID_T  document,
int  length
[private]
 

void IndexWriter::_writeStatistics indri::utility::greedy_vector< WriterIndexContext * > &  lists,
indri::index::TermData termData,
UINT64 startOffset
[private]
 

void IndexWriter::write std::vector< indri::index::Index * > &  indexes,
std::vector< indri::index::Index::FieldDescription > &  fields,
std::vector< indri::index::DeletedDocumentList * > &  deletedLists,
const std::vector< lemur::api::DOCID_T > &  documentMaximums,
const std::string &  path
 

void IndexWriter::write std::vector< indri::index::Index * > &  indexes,
std::vector< indri::index::Index::FieldDescription > &  fields,
indri::index::DeletedDocumentList deletedList,
const std::string &  fileName
 

void IndexWriter::write indri::index::Index index,
std::vector< indri::index::Index::FieldDescription > &  fields,
indri::index::DeletedDocumentList deletedList,
const std::string &  fileName
 


Member Data Documentation

char* indri::index::IndexWriter::_compressedData [private]
 

indri::index::CorpusStatistics indri::index::IndexWriter::_corpus [private]
 

int indri::index::IndexWriter::_dataSize [private]
 

indri::file::File indri::index::IndexWriter::_directFile [private]
 

lemur::api::DOCID_T indri::index::IndexWriter::_documentBase [private]
 

indri::file::File indri::index::IndexWriter::_documentLengths [private]
 

indri::file::File indri::index::IndexWriter::_documentStatistics [private]
 

std::vector<indri::index::FieldStatistics> indri::index::IndexWriter::_fieldData [private]
 

std::vector<indri::index::Index::FieldDescription> indri::index::IndexWriter::_fields [private]
 

indri::file::File indri::index::IndexWriter::_fieldsFile [private]
 

keyfile_pair indri::index::IndexWriter::_frequentTerms [private]
 

indri::file::File indri::index::IndexWriter::_frequentTermsData [private]
 

indri::file::BulkTreeReader indri::index::IndexWriter::_frequentTermsReader [private]
 

keyfile_pair indri::index::IndexWriter::_infrequentTerms [private]
 

indri::file::BulkTreeReader indri::index::IndexWriter::_infrequentTermsReader [private]
 

indri::file::File indri::index::IndexWriter::_invertedFile [private]
 

indri::file::SequentialWriteBuffer* indri::index::IndexWriter::_invertedOutput [private]
 

int indri::index::IndexWriter::_isFrequentCount [private]
 

indri::utility::Buffer indri::index::IndexWriter::_termDataBuffer [private]
 

indri::utility::greedy_vector<indri::index::DiskTermData*> indri::index::IndexWriter::_topTerms [private]
 

int indri::index::IndexWriter::_topTermsCount [private]
 

char* indri::index::IndexWriter::_uncompressedData [private]
 


The documentation for this class was generated from the following files:
Generated on Tue Jun 15 11:03:00 2010 for Lemur by doxygen 1.3.4