00001 /*========================================================================== 00002 * Copyright (c) 2003 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // KeyfileTextHandler.hpp 00015 // 00016 00017 #ifndef LEMUR_KEYFILE_TEXT_HANDLER_HPP 00018 #define LEMUR_KEYFILE_TEXT_HANDLER_HPP 00019 00020 #include "TextHandler.hpp" 00021 #include "DocumentProps.hpp" 00022 namespace lemur 00023 { 00024 namespace index 00025 { 00026 class KeyfileIncIndex; 00027 } 00028 } 00029 namespace lemur 00030 { 00031 namespace parse 00032 { 00033 00036 class KeyfileTextHandler : public lemur::api::TextHandler { 00037 public: 00039 KeyfileTextHandler( class lemur::index::KeyfileIncIndex* index, bool countStops=false); 00040 ~KeyfileTextHandler(); 00042 char * handleDoc(char * docno); 00044 char * handleWord(char * word); 00046 void setDocManager(const string &mgrID); 00047 00048 private: 00049 void endDoc(); 00050 void endCollection(); 00051 00052 int docLength; 00053 DocumentProps* dp; 00054 bool first; 00055 int pos; 00056 bool countStopWds; 00057 class lemur::index::KeyfileIncIndex* _index; 00058 }; 00059 } 00060 } 00061 00062 #endif // LEMUR_KEYFILE_TEXT_HANDLER_HPP