Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

indri::collection::Repository Class Reference

#include <Repository.hpp>

List of all members.

Public Types

typedef std::vector< indri::index::Index * > index_vector
typedef indri::atomic::ref_ptr<
index_vector
index_state

Public Member Functions

 Repository ()
 ~Repository ()
int addDocument (indri::api::ParsedDocument *document, bool inCollection=true)
void deleteDocument (int documentID)
const std::vector< Field > & fields () const
 the indexed fields for this collection

std::vector< std::string > tags () const
 the tags for this collection

std::vector< std::string > priors () const
 the named priors list for this collection

std::string processTerm (const std::string &term)
CompressedCollectioncollection ()
 the compressed document collection

void create (const std::string &path, indri::api::Parameters *options=0)
void open (const std::string &path, indri::api::Parameters *options=0)
void openRead (const std::string &path, indri::api::Parameters *options=0)
void close ()
 Close the repository.

void compact ()
index_state indexes ()
 Indexes in this repository.

indri::collection::PriorListIteratorpriorListIterator (const std::string &priorName)
 Return a prior iterator.

void countQuery ()
 Notify the repository that a query has happened.

void write ()
 Write the most recent state out to disk.

void merge ()
 Merge all internal indexes together.

indri::index::DeletedDocumentListdeletedList ()
 List of deleted documents in this repository.

Load queryLoad ()
 Returns the average number of documents added each minute in the last 1, 5 and 15 minutes.

Load documentLoad ()
 Returns the average number of documents added each minute in the last 1, 5 and 15 minutes.


Static Public Member Functions

bool exists (const std::string &path)
void makeEmpty (const std::string &path)
 Make an empty repository directory on disk.

void merge (const std::string &outputIndex, const std::vector< std::string > &inputIndexes)
 Merge two or more repositories together.


Private Types

enum  { LOAD_MINUTES = 15, LOAD_MINUTE_FRACTION = 12 }

Private Member Functions

void _writeParameters (const std::string &path)
void _checkpoint ()
void _incrementLoad ()
void _countDocumentAdd ()
Load _computeLoad (indri::atomic::value_type *loadArray)
void _openPriors (const std::string &path)
void _closePriors ()
void _buildFields ()
void _buildChain (indri::api::Parameters &parameters, indri::api::Parameters *options)
void _copyParameters (indri::api::Parameters &options)
void _removeStates (std::vector< index_state > &toRemove)
void _remove (const std::string &path)
void _openIndexes (indri::api::Parameters &params, const std::string &parentPath)
std::vector< index_state_statesContaining (std::vector< indri::index::Index * > &indexes)
bool _stateContains (index_state &state, std::vector< indri::index::Index * > &indexes)
void _swapState (std::vector< indri::index::Index * > &oldIndexes, indri::index::Index *newIndex)
void _closeIndexes ()
void _merge (index_state &state)
indri::index::Index_mergeStage (index_state &state)
UINT64 _mergeMemory (const std::vector< indri::index::Index * > &indexes)
unsigned int _mergeFiles (const std::vector< indri::index::Index * > &indexes)
void _merge ()
 merge all known indexes together

void _write ()
 write the active index to disk

void _trim ()
 merge together some of the more recent indexes

void _startThreads ()
void _stopThreads ()
void _setThrashing (bool flag)
UINT64 _timeSinceThrashing ()
void _addMemoryIndex ()

Static Private Member Functions

std::vector< std::string > _fieldNames (indri::api::Parameters &parameters)
std::string _stemmerName (indri::api::Parameters &parameters)
void _mergeClosedIndexes (const std::string &outputPath, const std::vector< std::string > &repositories, const std::vector< indri::collection::Repository::Field > &indexFields, const std::vector< lemur::api::DOCID_T > &documentMaximums)
void _writeMergedManifest (const std::string &path, indri::api::Parameters &firstManifest)
void _mergeBitmaps (const std::string &outputPath, const std::vector< std::string > &repositories, const std::vector< lemur::api::DOCID_T > &documentCounts)
void _mergeCompressedCollections (const std::string &outputPath, const std::vector< std::string > &repositories, const std::vector< lemur::api::DOCID_T > &documentMaximums)
void _cleanAndCreateDirectory (const std::string &path)
std::vector< indri::index::Index::FieldDescription_fieldsForIndex (const std::vector< Repository::Field > &_fields)

Private Attributes

RepositoryMaintenanceThread_maintenanceThread
RepositoryLoadThread_loadThread
indri::thread::Mutex _stateLock
std::vector< index_state_states
 protects against state changes

index_state _active
int _indexCount
volatile bool _maintenanceRunning
volatile bool _loadThreadRunning
indri::thread::Mutex _addLock
CompressedCollection_collection
 protects addDocument

indri::index::DeletedDocumentList _deletedList
indri::api::Parameters _parameters
std::vector< indri::parse::Transformation * > _transformations
std::vector< Field_fields
std::vector< indri::index::Index::FieldDescription_indexFields
std::map< std::string, indri::file::File * > _priorFiles
std::string _path
bool _readOnly
INT64 _memory
UINT64 _lastThrashTime
volatile bool _thrashing
indri::atomic::value_type _queryLoad [LOAD_MINUTES *LOAD_MINUTE_FRACTION]
indri::atomic::value_type _documentLoad [LOAD_MINUTES *LOAD_MINUTE_FRACTION]

Friends

class RepositoryMaintenanceThread
class RepositoryLoadThread


Detailed Description

Encapsulates document manager, index, and field indexes. Provides access to collection for both IndexEnvironment and QueryEnvironment.


Member Typedef Documentation

typedef indri::atomic::ref_ptr<index_vector> indri::collection::Repository::index_state
 

typedef std::vector<indri::index::Index*> indri::collection::Repository::index_vector
 


Member Enumeration Documentation

anonymous enum [private]
 

Enumeration values:
LOAD_MINUTES 
LOAD_MINUTE_FRACTION 


Constructor & Destructor Documentation

indri::collection::Repository::Repository  )  [inline]
 

indri::collection::Repository::~Repository  )  [inline]
 


Member Function Documentation

void indri::collection::Repository::_addMemoryIndex  )  [private]
 

void indri::collection::Repository::_buildChain indri::api::Parameters parameters,
indri::api::Parameters options
[private]
 

void indri::collection::Repository::_buildFields  )  [private]
 

void indri::collection::Repository::_checkpoint  )  [private]
 

void indri::collection::Repository::_cleanAndCreateDirectory const std::string &  path  )  [static, private]
 

void indri::collection::Repository::_closeIndexes  )  [private]
 

void indri::collection::Repository::_closePriors  )  [private]
 

indri::collection::Repository::Load indri::collection::Repository::_computeLoad indri::atomic::value_type loadArray  )  [private]
 

void indri::collection::Repository::_copyParameters indri::api::Parameters options  )  [private]
 

void indri::collection::Repository::_countDocumentAdd  )  [private]
 

std::vector< std::string > indri::collection::Repository::_fieldNames indri::api::Parameters parameters  )  [static, private]
 

std::vector< indri::index::Index::FieldDescription > indri::collection::Repository::_fieldsForIndex const std::vector< Repository::Field > &  _fields  )  [static, private]
 

void indri::collection::Repository::_incrementLoad  )  [private]
 

void indri::collection::Repository::_merge  )  [private]
 

merge all known indexes together

void indri::collection::Repository::_merge index_state state  )  [private]
 

void indri::collection::Repository::_mergeBitmaps const std::string &  outputPath,
const std::vector< std::string > &  repositories,
const std::vector< lemur::api::DOCID_T > &  documentCounts
[static, private]
 

void indri::collection::Repository::_mergeClosedIndexes const std::string &  outputPath,
const std::vector< std::string > &  repositories,
const std::vector< indri::collection::Repository::Field > &  indexFields,
const std::vector< lemur::api::DOCID_T > &  documentMaximums
[static, private]
 

void indri::collection::Repository::_mergeCompressedCollections const std::string &  outputPath,
const std::vector< std::string > &  repositories,
const std::vector< lemur::api::DOCID_T > &  documentMaximums
[static, private]
 

unsigned int indri::collection::Repository::_mergeFiles const std::vector< indri::index::Index * > &  indexes  )  [private]
 

UINT64 indri::collection::Repository::_mergeMemory const std::vector< indri::index::Index * > &  indexes  )  [private]
 

indri::index::Index * indri::collection::Repository::_mergeStage index_state state  )  [private]
 

void indri::collection::Repository::_openIndexes indri::api::Parameters params,
const std::string &  parentPath
[private]
 

void indri::collection::Repository::_openPriors const std::string &  path  )  [private]
 

void indri::collection::Repository::_remove const std::string &  path  )  [private]
 

void indri::collection::Repository::_removeStates std::vector< index_state > &  toRemove  )  [private]
 

void indri::collection::Repository::_setThrashing bool  flag  )  [private]
 

void indri::collection::Repository::_startThreads  )  [private]
 

bool indri::collection::Repository::_stateContains index_state state,
std::vector< indri::index::Index * > &  indexes
[private]
 

std::vector< indri::collection::Repository::index_state > indri::collection::Repository::_statesContaining std::vector< indri::index::Index * > &  indexes  )  [private]
 

std::string indri::collection::Repository::_stemmerName indri::api::Parameters parameters  )  [static, private]
 

void indri::collection::Repository::_stopThreads  )  [private]
 

void indri::collection::Repository::_swapState std::vector< indri::index::Index * > &  oldIndexes,
indri::index::Index newIndex
[private]
 

UINT64 indri::collection::Repository::_timeSinceThrashing  )  [private]
 

void indri::collection::Repository::_trim  )  [private]
 

merge together some of the more recent indexes

void indri::collection::Repository::_write  )  [private]
 

write the active index to disk

void indri::collection::Repository::_writeMergedManifest const std::string &  path,
indri::api::Parameters firstManifest
[static, private]
 

void indri::collection::Repository::_writeParameters const std::string &  path  )  [private]
 

int indri::collection::Repository::addDocument indri::api::ParsedDocument document,
bool  inCollection = true
 

add a parsed document to the repository.

Parameters:
document the document to add.
inCollection if true, add the document to the CompressedCollection.

void indri::collection::Repository::close  ) 
 

Close the repository.

indri::collection::CompressedCollection * indri::collection::Repository::collection  ) 
 

the compressed document collection

void indri::collection::Repository::compact  ) 
 

Compact the repository by removing all information about deleted documents from disk.

void indri::collection::Repository::countQuery  ) 
 

Notify the repository that a query has happened.

void indri::collection::Repository::create const std::string &  path,
indri::api::Parameters options = 0
 

Create a new empty repository.

Parameters:
path the directory to create the repository in
options additional parameters

indri::index::DeletedDocumentList & indri::collection::Repository::deletedList  ) 
 

List of deleted documents in this repository.

void indri::collection::Repository::deleteDocument int  documentID  ) 
 

delete a document from the repository

Parameters:
documentID the internal ID of the document to delete

indri::collection::Repository::Load indri::collection::Repository::documentLoad  ) 
 

Returns the average number of documents added each minute in the last 1, 5 and 15 minutes.

bool indri::collection::Repository::exists const std::string &  path  )  [static]
 

Returns:
true if a valid Indri Repository resides in the named path false otherwise.
Parameters:
path the directory to open the repository from

const std::vector< indri::collection::Repository::Field > & indri::collection::Repository::fields  )  const
 

the indexed fields for this collection

indri::collection::Repository::index_state indri::collection::Repository::indexes  ) 
 

Indexes in this repository.

void indri::collection::Repository::makeEmpty const std::string &  path  )  [static]
 

Make an empty repository directory on disk.

void indri::collection::Repository::merge const std::string &  outputIndex,
const std::vector< std::string > &  inputIndexes
[static]
 

Merge two or more repositories together.

void indri::collection::Repository::merge  ) 
 

Merge all internal indexes together.

void indri::collection::Repository::open const std::string &  path,
indri::api::Parameters options = 0
 

Open an existing repository.

Parameters:
path the directory to open the repository from
options additional parameters

void indri::collection::Repository::openRead const std::string &  path,
indri::api::Parameters options = 0
 

Open an existing repository in read only mode.

Parameters:
path the directory to open the repository from
options additional parameters

indri::collection::PriorListIterator * indri::collection::Repository::priorListIterator const std::string &  priorName  ) 
 

Return a prior iterator.

std::vector< std::string > indri::collection::Repository::priors  )  const
 

the named priors list for this collection

std::string indri::collection::Repository::processTerm const std::string &  term  ) 
 

Process, possibly transforming, the given term

Parameters:
term the term to process
Returns:
the processed term

indri::collection::Repository::Load indri::collection::Repository::queryLoad  ) 
 

Returns the average number of documents added each minute in the last 1, 5 and 15 minutes.

std::vector< std::string > indri::collection::Repository::tags  )  const
 

the tags for this collection

void indri::collection::Repository::write  ) 
 

Write the most recent state out to disk.


Friends And Related Function Documentation

friend class RepositoryLoadThread [friend]
 

friend class RepositoryMaintenanceThread [friend]
 


Member Data Documentation

index_state indri::collection::Repository::_active [private]
 

indri::thread::Mutex indri::collection::Repository::_addLock [private]
 

class CompressedCollection* indri::collection::Repository::_collection [private]
 

protects addDocument

indri::index::DeletedDocumentList indri::collection::Repository::_deletedList [private]
 

indri::atomic::value_type indri::collection::Repository::_documentLoad[ LOAD_MINUTES * LOAD_MINUTE_FRACTION ] [private]
 

std::vector<Field> indri::collection::Repository::_fields [private]
 

int indri::collection::Repository::_indexCount [private]
 

std::vector<indri::index::Index::FieldDescription> indri::collection::Repository::_indexFields [private]
 

UINT64 indri::collection::Repository::_lastThrashTime [private]
 

class RepositoryLoadThread* indri::collection::Repository::_loadThread [private]
 

volatile bool indri::collection::Repository::_loadThreadRunning [private]
 

volatile bool indri::collection::Repository::_maintenanceRunning [private]
 

class RepositoryMaintenanceThread* indri::collection::Repository::_maintenanceThread [private]
 

INT64 indri::collection::Repository::_memory [private]
 

indri::api::Parameters indri::collection::Repository::_parameters [private]
 

std::string indri::collection::Repository::_path [private]
 

std::map<std::string, indri::file::File*> indri::collection::Repository::_priorFiles [private]
 

indri::atomic::value_type indri::collection::Repository::_queryLoad[ LOAD_MINUTES * LOAD_MINUTE_FRACTION ] [private]
 

bool indri::collection::Repository::_readOnly [private]
 

indri::thread::Mutex indri::collection::Repository::_stateLock [private]
 

std::vector<index_state> indri::collection::Repository::_states [private]
 

protects against state changes

volatile bool indri::collection::Repository::_thrashing [private]
 

std::vector<indri::parse::Transformation*> indri::collection::Repository::_transformations [private]
 


The documentation for this class was generated from the following files:
Generated on Tue Jun 15 11:03:00 2010 for Lemur by doxygen 1.3.4