00001 /*========================================================================== 00002 * Copyright (c) 2002-2003 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #ifndef _DOCOFFSETPARSER_HPP 00013 #define _DOCOFFSETPARSER_HPP 00014 #include "Parser.hpp" 00015 #include "Match.hpp" 00016 namespace lemur 00017 { 00019 namespace parse 00020 { 00021 00024 class DocOffsetParser : public lemur::api::TextHandler { 00025 00026 public: 00028 DocOffsetParser(lemur::api::Parser *parser) : p(parser) { 00029 p->setTextHandler(this); 00030 } 00031 00032 virtual ~DocOffsetParser(){} 00033 00035 virtual char *handleWord(char * word) { 00036 if (word != NULL) { 00037 int end = p->fileTell() - 1; 00038 int start = (end - strlen(word)) + 1; 00039 Match m; 00040 m.start = start; 00041 m.end = end; 00042 offsets.push_back(m); 00043 } 00044 return word; 00045 } 00047 virtual void parseString(char *buffer) { 00048 offsets.clear(); 00049 p->parseBuffer(buffer, strlen(buffer)); 00050 } 00052 vector <Match> getOffsets() { return offsets; } 00053 private: 00055 vector <Match> offsets; 00056 lemur::api::Parser *p; 00057 }; 00058 } 00059 } 00060 00061 #endif