Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

TextHandler.hpp

Go to the documentation of this file.
00001 
00002 /*==========================================================================
00003  *
00004  *  Original source copyright (c) 2001, Carnegie Mellon University.
00005  *  See copyright.cmu for details.
00006  *  Modifications copyright (c) 2002, University of Massachusetts.
00007  *  See copyright.umass for details.
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #include <cstring>
00013 #include "common_headers.hpp"
00014 
00015 #ifndef NULL
00016 #define NULL 0
00017 #endif
00018 
00019 #ifndef _TEXTHANDLER_HPP
00020 #define _TEXTHANDLER_HPP
00021 
00022 #include "PropertyList.hpp"
00023 
00024 
00025 #define MAXWORDSIZE 1024
00026 
00045 
00046 
00052 
00053 //  Might make more sense as TextSource and TextDestination with
00054 //  functions in the middle of the chain inheriting from both.
00055 #include <cstdio>
00056 
00057 class TextHandler {
00058 
00059 public:
00060   enum TokenType {BEGINDOC = 1, ENDDOC = 2, WORDTOK = 3, 
00061                   BEGINTAG = 4, ENDTAG = 5, SYMBOLTOK = 6};
00062   static const string category;
00063   static const string identifier;
00064 
00065   TextHandler() {
00066     textHandler = NULL;
00067     buffer[MAXWORDSIZE-1] = '\0';
00068     cat = category;
00069     iden = identifier;
00070   }
00071   virtual ~TextHandler() {}
00072   
00074   virtual void setTextHandler(TextHandler * th) {
00075     textHandler = th;
00076   }
00078   virtual TextHandler * getTextHandler() {
00079     return textHandler;
00080   }
00081 
00082   virtual void foundToken(TokenType type, 
00083                           char * token = NULL, 
00084                           const char * orig = NULL,
00085                           PropertyList * properties = NULL) {
00086     char * t = NULL;
00087 
00088     if (token != NULL) {
00089       strncpy(buffer, token, MAXWORDSIZE - 1);
00090       t = buffer;
00091     } 
00092 
00093     switch (type) {
00094 
00095     case BEGINDOC:
00096       t = handleBeginDoc(t, orig, properties);
00097       break;
00098     case ENDDOC:
00099       t = handleEndDoc(t, orig, properties);
00100       break;
00101     case WORDTOK:
00102       t = handleWord(t, orig, properties);
00103       break;
00104     case BEGINTAG:
00105       t = handleBeginTag(t, orig, properties);
00106       break;
00107     case ENDTAG:
00108       t = handleEndTag(t, orig, properties);
00109       break;            
00110     case SYMBOLTOK:
00111       t = handleSymbol(t, orig, properties);
00112       break;            
00113     }
00114 
00115     if (textHandler != NULL) {
00116       textHandler->foundToken(type, t, orig, properties);
00117     }
00118   }
00119 
00122   virtual char * handleBeginDoc(char * docno, const char * original,
00123                                 PropertyList * list) {
00124     return handleDoc(docno);
00125   }
00128   virtual char * handleEndDoc(char * token, const char * original,
00129                               PropertyList * list) {
00130     handleEndDoc();
00131     return token;
00132   }
00135   virtual char * handleWord(char * word, const char * original,
00136                             PropertyList * list) {
00137     return handleWord(word);
00138   }
00140   virtual char * handleBeginTag(char * tag, const char * original,
00141                                 PropertyList * list) {
00142     return tag;
00143   }
00145   virtual char * handleEndTag(char * tag, const char * original,
00146                               PropertyList * list) {
00147     return tag;
00148   }
00149 
00152   virtual char * handleSymbol(char * symbol, const char * original,
00153                               PropertyList * list) {
00154     return handleSymbol(symbol);
00155   }
00156 
00157 
00158 
00159   // For backwards compatability
00161   virtual void foundDoc(char * docno) {
00162     foundToken(BEGINDOC, docno, docno);
00163   }
00164   virtual void foundDoc(char * docno, const char * original) {
00165     foundToken(BEGINDOC, docno, original);
00166   }
00168   virtual void foundWord(char * word) {
00169     foundToken(WORDTOK, word, word);
00170   }
00171   virtual void foundWord(char * word, const char * original) {
00172     foundToken(WORDTOK, word, original);
00173   }
00175   virtual void foundEndDoc() {
00176     foundToken(ENDDOC);
00177   }
00179   virtual void foundSymbol(char * sym) {
00180     foundToken(SYMBOLTOK, sym, sym);
00181   }  
00182   // Kept for backwords compatability
00184   virtual char * handleDoc(char * docno) { return docno; }
00186   virtual char * handleWord(char * word) { return word; }
00188   virtual void handleEndDoc() { }
00190   virtual char * handleSymbol(char * sym) { return sym; }
00191 
00193   virtual string getCategory() { return cat; }
00195   virtual string getIdentifier() { return iden; }
00196 protected:
00198   TextHandler * textHandler;
00199   string cat;
00200   string iden;
00201 
00202   char buffer[MAXWORDSIZE];
00203 };
00204 
00205 
00206 #endif
00207 

Generated on Wed Nov 3 12:59:05 2004 for Lemur Toolkit by doxygen1.2.18