|
Public Member Functions |
| TaggedTextParser () |
| ~TaggedTextParser () |
void | setTags (const std::vector< std::string > &include, const std::vector< std::string > &exclude, const std::vector< std::string > &index, const std::vector< std::string > &metadata, const std::map< indri::parse::ConflationPattern *, std::string > &conflations) |
indri::api::ParsedDocument * | parse (TokenizedDocument *document) |
void | handle (TokenizedDocument *document) |
void | setHandler (ObjectHandler< indri::api::ParsedDocument > &h) |
Protected Types |
typedef indri::utility::HashTable<
std::string, std::string > | StrHashTable |
Protected Member Functions |
virtual void | initialize (TokenizedDocument *document, indri::api::ParsedDocument *parsed) |
virtual void | cleanup (TokenizedDocument *document, indri::api::ParsedDocument *parsed) |
void | addTag (const char *s, const char *c, int pos) |
void | endTag (const char *s, const char *c, int pos) |
void | addMetadataTag (const char *s, const char *c, int pos) |
void | endMetadataTag (const char *s, const char *c, int pos) |
tag_properties * | _findTag (std::string name) |
tag_properties * | _buildTag (std::string name) |
virtual void | handleTag (TagEvent *te) |
Protected Attributes |
Conflater * | _p_conflater |
TagList * | tl |
TagList * | _metaList |
indri::utility::Buffer | _termBuffer |
indri::utility::HashTable<
const char *, tag_properties * > | _tagTable |
const tag_properties * | _startExcludeRegion |
const tag_properties * | _startIncludeRegion |
bool | _exclude |
bool | _include |
bool | _defaultInclude |
unsigned int | token_pos |
unsigned int | tokens_excluded |
indri::api::ParsedDocument | _document |
Private Attributes |
ObjectHandler< indri::api::ParsedDocument > * | _handler |