00001 /*========================================================================== 00002 * Copyright (c) 2003-2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // ParsedDocument 00015 // 00016 // 12 May 2004 -- tds 00017 // 00018 00019 #ifndef INDRI_PARSEDDOCUMENT_HPP 00020 #define INDRI_PARSEDDOCUMENT_HPP 00021 00022 #include "indri/greedy_vector" 00023 #include "indri/TagExtent.hpp" 00024 #include "indri/TermExtent.hpp" 00025 #include "indri/MetadataPair.hpp" 00026 #include <string> 00027 namespace indri 00028 { 00029 namespace api 00030 { 00031 00032 struct ParsedDocument { 00033 const char* text; 00034 size_t textLength; 00035 00036 const char* content; 00037 size_t contentLength; 00038 00039 std::string getContent() { 00040 return std::string (content, contentLength); 00041 } 00042 00043 indri::utility::greedy_vector<char*> terms; 00044 indri::utility::greedy_vector<indri::parse::TagExtent *> tags; 00045 indri::utility::greedy_vector<indri::parse::TermExtent> positions; 00046 indri::utility::greedy_vector<indri::parse::MetadataPair> metadata; 00047 }; 00048 } 00049 } 00050 00051 #endif // INDRI_PARSEDDOCUMENT_HPP 00052