Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

TextHandler.hpp

Go to the documentation of this file.
00001 
00002 /*==========================================================================
00003  *
00004  *  Original source copyright (c) 2001, Carnegie Mellon University.
00005  *  See copyright.cmu for details.
00006  *  Modifications copyright (c) 2002, University of Massachusetts.
00007  *  See copyright.umass for details.
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #include <cstring>
00013 #include "common_headers.hpp"
00014 
00015 #ifndef NULL
00016 #define NULL 0
00017 #endif
00018 
00019 #ifndef _TEXTHANDLER_HPP
00020 #define _TEXTHANDLER_HPP
00021 #include <cstdio>
00022 #include "PropertyList.hpp"
00023 #include "Exception.hpp"
00024 
00025 #define MAXWORDSIZE 1024
00026 
00027 namespace lemur 
00028 {
00029   namespace api
00030   {
00031     
00033 
00063     class TextHandler {
00064 
00065     public:
00066       enum TokenType {BEGINDOC = 1, ENDDOC = 2, WORDTOK = 3, 
00067                       BEGINTAG = 4, ENDTAG = 5, SYMBOLTOK = 6};
00068       static const string category;
00069       static const string identifier;
00070 
00071       TextHandler() {
00072         textHandler = NULL;
00073         prevHandler = NULL;
00074         buffer[MAXWORDSIZE-1] = '\0';
00075         cat = category;
00076         iden = identifier;
00077       }
00078       virtual ~TextHandler() {
00079         if (textHandler)
00080           textHandler->destroyPrevHandler();
00081         if (prevHandler)
00082           prevHandler->destroyTextHandler();
00083       }
00084   
00086       virtual void setTextHandler(TextHandler * th) {
00087         textHandler = th;
00088         textHandler->setPrevHandler(this);
00089       }
00090 
00092       virtual TextHandler * getTextHandler() {
00093         return textHandler;
00094       }
00095 
00097       virtual TextHandler * getPrevHandler() {
00098         return prevHandler;
00099       }
00100 
00101       virtual void foundToken(TokenType type, 
00102                               const char * token = NULL, 
00103                               const char * orig = NULL,
00104                               lemur::parse::PropertyList * properties = NULL) {
00105         char * t = NULL;
00106 
00107         if (token != NULL) {
00108           strncpy(buffer, token, MAXWORDSIZE - 1);
00109           t = buffer;
00110         } 
00111 
00112         switch (type) {
00113 
00114         case BEGINDOC:
00115           t = handleBeginDoc(t, orig, properties);
00116           break;
00117         case ENDDOC:
00118           t = handleEndDoc(t, orig, properties);
00119           break;
00120         case WORDTOK:
00121           t = handleWord(t, orig, properties);
00122           break;
00123         case BEGINTAG:
00124           t = handleBeginTag(t, orig, properties);
00125           break;
00126         case ENDTAG:
00127           t = handleEndTag(t, orig, properties);
00128           break;            
00129         case SYMBOLTOK:
00130           t = handleSymbol(t, orig, properties);
00131           break;            
00132         }
00133 
00134         if (textHandler != NULL) {
00135           textHandler->foundToken(type, t, orig, properties);
00136         }
00137       }
00138 
00141       virtual char * handleBeginDoc(char * docno, const char * original,
00142                                     lemur::parse::PropertyList * list) {
00143         return handleDoc(docno);
00144       }
00147       virtual char * handleEndDoc(char * token, const char * original,
00148                                   lemur::parse::PropertyList * list) {
00149         handleEndDoc();
00150         return token;
00151       }
00154       virtual char * handleWord(char * word, const char * original,
00155                                 lemur::parse::PropertyList * list) {
00156         return handleWord(word);
00157       }
00159       virtual char * handleBeginTag(char * tag, const char * original,
00160                                     lemur::parse::PropertyList * list) {
00161         return tag;
00162       }
00164       virtual char * handleEndTag(char * tag, const char * original,
00165                                   lemur::parse::PropertyList * list) {
00166         return tag;
00167       }
00168 
00171       virtual char * handleSymbol(char * symbol, const char * original,
00172                                   lemur::parse::PropertyList * list) {
00173         return handleSymbol(symbol);
00174       }
00175 
00176 
00177 
00178       // For backwards compatability
00180       virtual void foundDoc(char * docno) {
00181         foundToken(BEGINDOC, docno, docno);
00182       }
00183       virtual void foundDoc(char * docno, const char * original) {
00184         foundToken(BEGINDOC, docno, original);
00185       }
00187       virtual void foundWord(char * word) {
00188         foundToken(WORDTOK, word, word);
00189       }
00190       virtual void foundWord(char * word, const char * original) {
00191         foundToken(WORDTOK, word, original);
00192       }
00194       virtual void foundEndDoc() {
00195         foundToken(ENDDOC);
00196       }
00198       virtual void foundSymbol(const char * sym) {
00199         foundToken(SYMBOLTOK, sym, sym);
00200       }  
00201       // Kept for backwords compatability
00203       virtual char * handleDoc(char * docno) { return docno; }
00205       virtual char * handleWord(char * word) { return word; }
00207       virtual void handleEndDoc() { }
00209       virtual char * handleSymbol(char * sym) { return sym; }
00210 
00212       virtual string getCategory() const { return cat; }
00214       virtual string getIdentifier() const { return iden; }
00216       virtual void writePropertyList(lemur::parse::PropertyList* list) const{
00217         if (!list) {
00218           LEMUR_THROW(LEMUR_INTERNAL_ERROR, cat + " unable to save properties list");
00219           return;
00220         }
00221         lemur::parse::Property prop(cat);
00222         prop.setValue(iden);
00223         list->setProperty(&prop);
00224       }
00225 
00226     protected:
00228       virtual void setPrevHandler(TextHandler * th) {
00229         prevHandler = th;
00230       }
00231 
00233       virtual void destroyPrevHandler() {
00234         if (prevHandler)
00235           prevHandler = prevHandler->getPrevHandler();
00236       }
00237 
00239       virtual void destroyTextHandler() {
00240         if (textHandler)
00241           textHandler = textHandler->getTextHandler();
00242       }
00243 
00245       TextHandler * textHandler;
00247       TextHandler * prevHandler;
00248       string cat;
00249       string iden;
00250 
00251       char buffer[MAXWORDSIZE];
00252     };
00253   }
00254 }
00255 #endif

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4