00001 /*========================================================================== 00002 * Copyright (c) 2003 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #ifndef _IDENTIPARSER_HPP 00013 #define _IDENTIPARSER_HPP 00014 #include "Parser.hpp" 00015 #include "TextHandler.hpp" 00016 #include "LinkedPropertyList.hpp" 00017 00018 namespace lemur 00019 { 00020 namespace parse 00021 { 00022 00040 // Source code in IdentifinderParser.l 00041 00042 00043 #define BEGIN_PREFIX "B_" 00044 #define END_PREFIX "E_" 00045 // for simplicity, make both prefixes the same length 00046 #define PREFIX_LEN 2 00047 00048 class IdentifinderParser : public lemur::api::Parser { 00049 00050 public: 00051 static const string identifier; 00052 00053 IdentifinderParser(); 00054 00056 void parseFile(const string &filename); 00057 00058 void parseBuffer(char * buf, int len); 00059 00060 long fileTell() const; 00061 00062 private: 00064 void doParse(); 00065 00067 int state; 00068 00070 int poscount; 00071 00073 Property wordpos; 00074 Property tag; // entity tag 00075 Property btag; // the begin tag 00076 Property etag; // the end tag 00077 00078 00080 LinkedPropertyList proplist; 00081 }; 00082 } 00083 } 00084 00085 #endif