#include <KeyfileDocMgr.hpp>
Inheritance diagram for lemur::parse::KeyfileDocMgr:
Public Member Functions | |
KeyfileDocMgr () | |
default constructor | |
KeyfileDocMgr (const string &name, bool readOnly=true) | |
KeyfileDocMgr (string name, string mode, string source) | |
virtual | ~KeyfileDocMgr () |
char * | getDoc (const string &docID) const |
return the document associated with this ID | |
virtual char * | handleDoc (char *docno) |
add entry for new doc | |
virtual void | handleEndDoc () |
finish entry for current doc | |
virtual char * | handleWord (char *word) |
Add start and end byte offsets for this term to the list of offsets. | |
virtual void | setParser (lemur::api::Parser *p) |
set myParser to p | |
virtual lemur::api::Parser * | getParser () const |
returns a handle to a Parser object that can handle parsing the raw format of these documents | |
virtual void | buildMgr () |
virtual const string & | getMyID () const |
return name of this document manager, with the file extension (.bdm). | |
vector< Match > | getOffsets (const string &docID) const |
virtual bool | open (const string &manname) |
Open and load the toc file manname. | |
Protected Member Functions | |
virtual void | writeTOC () |
virtual bool | loadTOC () |
bool | loadFTFiles (const string &fn, int num) |
Protected Attributes | |
lemur::api::Parser * | myparser |
vector< Match > | offsets |
int | numdocs |
string | pm |
lemur::file::Keyfile | poslookup |
lemur::file::Keyfile | doclookup |
int | dbcache |
btl | docEntry |
char * | myDoc |
int | doclen |
string | IDname |
string | IDnameext |
vector< string > | sources |
int | numOldSources |
how many sources already processed? | |
int | fileid |
bool | ignoreDoc |
are we ignoring this document? | |
bool | _readOnly |
are we read only. |
|
default constructor
|
|
constructor (for open) name = toc file for this manager (same as getMyID) |
|
constructor (for build) name = what to name this manager mode = type of parser to use source = file with list of files this will manage |
|
|
|
Build the document manager tables from the files previously provided in the constructor. Implements lemur::api::DocumentManager. |
|
return the document associated with this ID
Implements lemur::api::DocumentManager. |
|
return name of this document manager, with the file extension (.bdm).
Implements lemur::api::DocumentManager. |
|
get the array of Match entries for the tokens in the document named docID. The entries are indexed by token position (as is recorded in a TermInfoList object. |
|
returns a handle to a Parser object that can handle parsing the raw format of these documents
Implements lemur::api::DocumentManager. |
|
add entry for new doc
Reimplemented from lemur::api::TextHandler. |
|
finish entry for current doc
Reimplemented from lemur::api::TextHandler. |
|
Add start and end byte offsets for this term to the list of offsets.
Reimplemented from lemur::api::TextHandler. |
|
|
|
Reimplemented in lemur::parse::ElemDocMgr. |
|
Open and load the toc file manname.
Implements lemur::api::DocumentManager. Reimplemented in lemur::parse::ElemDocMgr. |
|
set myParser to p
|
|
Reimplemented in lemur::parse::ElemDocMgr. |
|
are we read only.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
are we ignoring this document?
|
|
|
|
|
|
|
|
how many sources already processed?
|
|
|
|
|
|
|
|
|