|
Classes |
struct | extension_conflations |
struct | file_class_environment_spec |
Functions |
std::string | _canonicalExtension (const std::string &name) |
void | copy_strings_to_vector (std::vector< std::string > &vec, const char **array) |
void | copy_string_tuples_to_map (std::map< indri::parse::ConflationPattern *, std::string > &m, const char **array) |
void | cleanup_conflations_map (std::map< indri::parse::ConflationPattern *, std::string > &conflations) |
indri::parse::FileClassEnvironment * | build_file_class_environment (const file_class_environment_spec *spec) |
indri::parse::FileClassEnvironment * | build_file_class_environment (const indri::parse::FileClassEnvironmentFactory::Specification *spec) |
Variables |
const char * | pdf_index_tags [] = { "title", "author", 0 } |
const char * | pdf_metadata_tags [] = { "title", "author", 0 } |
const char * | html_index_tags [] = { "title", "author", "h1", "h2", "h3", "h4", 0 } |
const char * | html_metadata_tags [] = { "title", "author", 0 } |
const char * | html_conflations [] = { "h1", NULL, NULL, "heading", "h2", NULL, NULL, "heading", "h3", NULL, NULL, "heading", "h4", NULL, NULL, "heading", 0, 0, 0, 0 } |
const char * | trec_include_tags [] = { "text", "hl", "head", "headline", "title", "ttl", "dd", "date", "date_time", "lp", "leadpara", 0 } |
const char * | trecalt_include_tags [] = { "text", 0 } |
const char * | trecalt_index_tags [] = { "text", 0 } |
const char * | trec_metadata_tags [] = { "docno", "title", "author", 0 } |
const char * | trec_conflations [] = { "hl", NULL, NULL, "headline", "head", NULL, NULL, "headline", "ttl", NULL, NULL, "title", "dd", NULL, NULL, "date", "date_time", NULL, NULL, "date", 0, 0, 0, 0 } |
const char * | trec_index_tags [] = { "author", "hl", "head", "headline", "title", "ttl", "dd", "date_time", "date", 0 } |
const char * | html_exclude_tags [] = { "script", "style", 0} |
const char * | _html [] = {"html", "htm", 0} |
const char * | _txt [] = {"txt", "text", 0} |
const char * | _doc [] = {"doc", "docx", 0} |
const char * | _ppt [] = {"ppt", "pptx", 0} |
extension_conflations | extensions [] |
file_class_environment_spec | environments [] |
const char* trec_conflations[] = { "hl", NULL, NULL, "headline", "head", NULL, NULL, "headline", "ttl", NULL, NULL, "title", "dd", NULL, NULL, "date", "date_time", NULL, NULL, "date", 0, 0, 0, 0 } [static]
|
|