Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

indri::parse::TaggedTextParser Class Reference

#include <TaggedTextParser.hpp>

Inheritance diagram for indri::parse::TaggedTextParser:

indri::parse::Parser indri::parse::ObjectHandler< TokenizedDocument > indri::parse::HTMLParser List of all members.

Public Member Functions

 TaggedTextParser ()
 ~TaggedTextParser ()
void setTags (const std::vector< std::string > &include, const std::vector< std::string > &exclude, const std::vector< std::string > &index, const std::vector< std::string > &metadata, const std::map< indri::parse::ConflationPattern *, std::string > &conflations)
indri::api::ParsedDocumentparse (TokenizedDocument *document)
void handle (TokenizedDocument *document)
void setHandler (ObjectHandler< indri::api::ParsedDocument > &h)

Protected Types

typedef indri::utility::HashTable<
std::string, std::string > 
StrHashTable

Protected Member Functions

virtual void initialize (TokenizedDocument *document, indri::api::ParsedDocument *parsed)
virtual void cleanup (TokenizedDocument *document, indri::api::ParsedDocument *parsed)
void addTag (const char *s, const char *c, int pos)
void endTag (const char *s, const char *c, int pos)
void addMetadataTag (const char *s, const char *c, int pos)
void endMetadataTag (const char *s, const char *c, int pos)
tag_properties_findTag (std::string name)
tag_properties_buildTag (std::string name)
virtual void handleTag (TagEvent *te)

Protected Attributes

Conflater_p_conflater
TagListtl
TagList_metaList
indri::utility::Buffer _termBuffer
indri::utility::HashTable<
const char *, tag_properties * > 
_tagTable
const tag_properties_startExcludeRegion
const tag_properties_startIncludeRegion
bool _exclude
bool _include
bool _defaultInclude
unsigned int token_pos
unsigned int tokens_excluded
indri::api::ParsedDocument _document

Private Attributes

ObjectHandler< indri::api::ParsedDocument > * _handler

Member Typedef Documentation

typedef indri::utility::HashTable<std::string, std::string> indri::parse::TaggedTextParser::StrHashTable [protected]
 


Constructor & Destructor Documentation

indri::parse::TaggedTextParser::TaggedTextParser  ) 
 

indri::parse::TaggedTextParser::~TaggedTextParser  ) 
 


Member Function Documentation

indri::parse::TaggedTextParser::tag_properties * indri::parse::TaggedTextParser::_buildTag std::string  name  )  [protected]
 

indri::parse::TaggedTextParser::tag_properties * indri::parse::TaggedTextParser::_findTag std::string  name  )  [protected]
 

void indri::parse::TaggedTextParser::addMetadataTag const char *  s,
const char *  c,
int  pos
[inline, protected]
 

void indri::parse::TaggedTextParser::addTag const char *  s,
const char *  c,
int  pos
[inline, protected]
 

void indri::parse::TaggedTextParser::cleanup TokenizedDocument document,
indri::api::ParsedDocument parsed
[protected, virtual]
 

Reimplemented in indri::parse::HTMLParser.

void indri::parse::TaggedTextParser::endMetadataTag const char *  s,
const char *  c,
int  pos
[inline, protected]
 

void indri::parse::TaggedTextParser::endTag const char *  s,
const char *  c,
int  pos
[inline, protected]
 

void indri::parse::TaggedTextParser::handle TokenizedDocument document  )  [virtual]
 

Implements indri::parse::Parser.

void indri::parse::TaggedTextParser::handleTag TagEvent te  )  [protected, virtual]
 

Reimplemented in indri::parse::HTMLParser.

void indri::parse::TaggedTextParser::initialize TokenizedDocument document,
indri::api::ParsedDocument parsed
[protected, virtual]
 

Reimplemented in indri::parse::HTMLParser.

indri::api::ParsedDocument * indri::parse::TaggedTextParser::parse TokenizedDocument document  )  [virtual]
 

Implements indri::parse::Parser.

void indri::parse::TaggedTextParser::setHandler ObjectHandler< indri::api::ParsedDocument > &  h  )  [virtual]
 

Implements indri::parse::Parser.

void indri::parse::TaggedTextParser::setTags const std::vector< std::string > &  include,
const std::vector< std::string > &  exclude,
const std::vector< std::string > &  index,
const std::vector< std::string > &  metadata,
const std::map< indri::parse::ConflationPattern *, std::string > &  conflations
 


Member Data Documentation

bool indri::parse::TaggedTextParser::_defaultInclude [protected]
 

indri::api::ParsedDocument indri::parse::TaggedTextParser::_document [protected]
 

bool indri::parse::TaggedTextParser::_exclude [protected]
 

ObjectHandler<indri::api::ParsedDocument>* indri::parse::TaggedTextParser::_handler [private]
 

bool indri::parse::TaggedTextParser::_include [protected]
 

TagList* indri::parse::TaggedTextParser::_metaList [protected]
 

Conflater* indri::parse::TaggedTextParser::_p_conflater [protected]
 

const tag_properties* indri::parse::TaggedTextParser::_startExcludeRegion [protected]
 

const tag_properties* indri::parse::TaggedTextParser::_startIncludeRegion [protected]
 

indri::utility::HashTable<const char*,tag_properties*> indri::parse::TaggedTextParser::_tagTable [protected]
 

indri::utility::Buffer indri::parse::TaggedTextParser::_termBuffer [protected]
 

TagList* indri::parse::TaggedTextParser::tl [protected]
 

unsigned int indri::parse::TaggedTextParser::token_pos [protected]
 

unsigned int indri::parse::TaggedTextParser::tokens_excluded [protected]
 


The documentation for this class was generated from the following files:
Generated on Tue Jun 15 11:03:03 2010 for Lemur by doxygen 1.3.4