Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ParsedDocument.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2003-2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // ParsedDocument
00015 //
00016 // 12 May 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_PARSEDDOCUMENT_HPP
00020 #define INDRI_PARSEDDOCUMENT_HPP
00021 
00022 #include "indri/greedy_vector"
00023 #include "indri/TagExtent.hpp"
00024 #include "indri/TermExtent.hpp"
00025 #include "indri/MetadataPair.hpp"
00026 #include <string>
00027 namespace indri
00028 {
00029   namespace api 
00030   {
00031     
00032     struct ParsedDocument {  
00033       const char* text;
00034       size_t textLength;
00035 
00036       const char* content;
00037       size_t contentLength;
00038 
00039       std::string getContent() {
00040         return std::string (content, contentLength);
00041       }
00042       
00043       indri::utility::greedy_vector<char*> terms;
00044       indri::utility::greedy_vector<indri::parse::TagExtent *> tags;
00045       indri::utility::greedy_vector<indri::parse::TermExtent> positions;
00046       indri::utility::greedy_vector<indri::parse::MetadataPair> metadata;
00047     };
00048   }
00049 }
00050 
00051 #endif // INDRI_PARSEDDOCUMENT_HPP
00052 

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4