Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

MboxDocumentIterator.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2003-2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // MboxDocumentIterator
00014 //
00015 // 20 May 2005 -- tds
00016 //
00017 
00018 #ifndef INDRI_MBOXDOCUMENTITERATOR_HPP
00019 #define INDRI_MBOXDOCUMENTITERATOR_HPP
00020 
00021 #include "indri/DocumentIterator.hpp"
00022 #include "indri/UnparsedDocument.hpp"
00023 #include "indri/Buffer.hpp"
00024 #include <fstream>
00025 namespace indri
00026 {
00027   namespace parse
00028   {
00029     class MboxDocumentIterator : public DocumentIterator {
00030     private:
00031       std::string _filename;
00032       UnparsedDocument _document;
00033       indri::utility::Buffer _buffer;
00034       indri::utility::Buffer _metaBuffer;
00035       std::ifstream _in;
00036 
00037       void _copyMetadata( const char* headerLine, int ignoreBytes, const char* tagName );
00038 
00039     public:
00040       void open( const std::string& filename );
00041       UnparsedDocument* nextDocument();
00042       void close();
00043     };
00044   }
00045 }
00046 
00047 #endif // INDRI_MBOXDOCUMENTITERATOR_HPP
00048 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4