Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

indri::parse::HTMLParser Class Reference

#include <HTMLParser.hpp>

Inheritance diagram for indri::parse::HTMLParser:

indri::parse::TaggedTextParser indri::parse::Parser indri::parse::ObjectHandler< TokenizedDocument > List of all members.

Public Member Functions

 HTMLParser ()
 ~HTMLParser ()

Protected Member Functions

virtual void initialize (TokenizedDocument *tokenized, indri::api::ParsedDocument *parsed)
virtual void cleanup (TokenizedDocument *tokenized, indri::api::ParsedDocument *parsed)
virtual void handleTag (TagEvent *te)
void prepURL (char *s)
bool normalizeURL (char *s)

Protected Attributes

char url [MAX_URL_LENGTH]
char base_url [MAX_URL_LENGTH]
tag_properties * _relativeUrlTag
tag_properties * _absoluteUrlTag
tag_properties * _anchorTag
indri::utility::Buffer _urlBuffer

Constructor & Destructor Documentation

indri::parse::HTMLParser::HTMLParser  )  [inline]
 

indri::parse::HTMLParser::~HTMLParser  )  [inline]
 


Member Function Documentation

void indri::parse::HTMLParser::cleanup TokenizedDocument tokenized,
indri::api::ParsedDocument parsed
[protected, virtual]
 

Reimplemented from indri::parse::TaggedTextParser.

void indri::parse::HTMLParser::handleTag TagEvent te  )  [protected, virtual]
 

Reimplemented from indri::parse::TaggedTextParser.

void indri::parse::HTMLParser::initialize TokenizedDocument tokenized,
indri::api::ParsedDocument parsed
[protected, virtual]
 

Reimplemented from indri::parse::TaggedTextParser.

bool indri::parse::HTMLParser::normalizeURL char *  s  )  [protected]
 

void indri::parse::HTMLParser::prepURL char *  s  )  [protected]
 


Member Data Documentation

tag_properties* indri::parse::HTMLParser::_absoluteUrlTag [protected]
 

tag_properties* indri::parse::HTMLParser::_anchorTag [protected]
 

tag_properties* indri::parse::HTMLParser::_relativeUrlTag [protected]
 

indri::utility::Buffer indri::parse::HTMLParser::_urlBuffer [protected]
 

char indri::parse::HTMLParser::base_url[MAX_URL_LENGTH] [protected]
 

char indri::parse::HTMLParser::url[MAX_URL_LENGTH] [protected]
 


The documentation for this class was generated from the following files:
Generated on Tue Jun 15 11:03:03 2010 for Lemur by doxygen 1.3.4