00001 /*========================================================================== 00002 * Copyright (c) 2003-2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // Parser 00015 // 00016 // 11 May 2004 -- tds 00017 // 00018 00019 #ifndef INDRI_PARSER_HPP 00020 #define INDRI_PARSER_HPP 00021 00022 #include "indri/ObjectHandler.hpp" 00023 #include "indri/TokenizedDocument.hpp" 00024 #include "indri/ParsedDocument.hpp" 00025 #include "indri/ConflationPattern.hpp" 00026 #include <map> 00027 #include <vector> 00028 00029 namespace indri { 00030 namespace parse 00031 { 00032 00033 class Parser : public ObjectHandler<TokenizedDocument> { 00034 public: 00035 virtual ~Parser() {}; 00036 00037 virtual indri::api::ParsedDocument* parse( TokenizedDocument* document ) = 0; 00038 virtual void setTags( const std::vector<std::string>& include, 00039 const std::vector<std::string>& exclude, 00040 const std::vector<std::string>& index, 00041 const std::vector<std::string>& metadata, 00042 const std::map<ConflationPattern *, std::string>& conflations ) = 0; 00043 00044 virtual void handle( TokenizedDocument* document ) = 0; 00045 virtual void setHandler( ObjectHandler<indri::api::ParsedDocument>& handler ) = 0; 00046 }; 00047 } 00048 } 00049 00050 #endif // INDRI_PARSER_HPP 00051 00052