Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

CharScanner.hpp

Go to the documentation of this file.
00001 #ifndef INC_CharScanner_hpp__
00002 #define INC_CharScanner_hpp__
00003 
00004 /* ANTLR Translator Generator
00005  * Project led by Terence Parr at http://www.jGuru.com
00006  * Software rights: http://www.antlr.org/license.html
00007  *
00008  * $Id: CharScanner.hpp,v 1.1 2004/10/08 16:27:34 dfisher Exp $
00009  */
00010 
00011 #include <antlr/config.hpp>
00012 
00013 #include <map>
00014 
00015 #ifdef HAS_NOT_CCTYPE_H
00016 #include <ctype.h>
00017 #else
00018 #include <cctype>
00019 #endif
00020 
00021 #if ( _MSC_VER == 1200 )
00022 // VC6 seems to need this
00023 // note that this is not a standard C++ include file.
00024 # include <stdio.h>
00025 #endif
00026 
00027 #include <antlr/TokenStream.hpp>
00028 #include <antlr/RecognitionException.hpp>
00029 #include <antlr/SemanticException.hpp>
00030 #include <antlr/MismatchedCharException.hpp>
00031 #include <antlr/InputBuffer.hpp>
00032 #include <antlr/BitSet.hpp>
00033 #include <antlr/LexerSharedInputState.hpp>
00034 
00035 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00036 namespace antlr {
00037 #endif
00038 
00039 class ANTLR_API CharScanner;
00040 
00041 ANTLR_C_USING(tolower)
00042 
00043 #ifdef ANTLR_REALLY_NO_STRCASECMP
00044 // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
00045 // on the mac has neither...
00046 inline int strcasecmp(const char *s1, const char *s2)
00047 {
00048         while (true)
00049         {
00050                 char  c1 = tolower(*s1++),
00051                                 c2 = tolower(*s2++);
00052                 if (c1 < c2) return -1;
00053                 if (c1 > c2) return 1;
00054                 if (c1 == 0) return 0;
00055         }
00056 }
00057 #else
00058 #ifdef NO_STRCASECMP
00059 ANTLR_C_USING(stricmp)
00060 #else
00061 ANTLR_C_USING(strcasecmp)
00062 #endif
00063 #endif
00064 
00067 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
00068 private:
00069         const CharScanner* scanner;
00070 public:
00071 #ifdef NO_TEMPLATE_PARTS
00072         CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
00073 #endif
00074         CharScannerLiteralsLess(const CharScanner* theScanner)
00075         : scanner(theScanner)
00076         {
00077         }
00078         bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
00079 // defaults are good enough..
00080         //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
00081         //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
00082 };
00083 
00086 class ANTLR_API CharScanner : public TokenStream {
00087 protected:
00088         typedef RefToken (*factory_type)();
00089 public:
00090         CharScanner(InputBuffer& cb, bool case_sensitive );
00091         CharScanner(InputBuffer* cb, bool case_sensitive );
00092         CharScanner(const LexerSharedInputState& state, bool case_sensitive );
00093 
00094         virtual ~CharScanner()
00095         {
00096         }
00097 
00098         virtual int LA(unsigned int i);
00099 
00100         virtual void append(char c)
00101         {
00102                 if (saveConsumedInput) {
00103                         int l = text.length();
00104                         if ((l%256) == 0)
00105                                 text.reserve(l+256);
00106                         text.replace(l,0,&c,1);
00107                 }
00108         }
00109 
00110         virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
00111         {
00112                 if (saveConsumedInput)
00113                         text+=s;
00114         }
00115 
00116         virtual void commit()
00117         {
00118                 inputState->getInput().commit();
00119         }
00120 
00121         virtual void consume();
00122 
00124         virtual void consumeUntil(int c)
00125         {
00126                 for(;;)
00127                 {
00128                         int la_1 = LA(1);
00129                         if( la_1 == EOF_CHAR || la_1 == c )
00130                                 break;
00131                         consume();
00132                 }
00133         }
00134 
00136         virtual void consumeUntil(const BitSet& set)
00137         {
00138                 for(;;)
00139                 {
00140                         int la_1 = LA(1);
00141                         if( la_1 == EOF_CHAR || set.member(la_1) )
00142                                 break;
00143                         consume();
00144                 }
00145         }
00146 
00148         virtual unsigned int mark()
00149         {
00150                 return inputState->getInput().mark();
00151         }
00153         virtual void rewind(unsigned int pos)
00154         {
00155                 inputState->getInput().rewind(pos);
00156         }
00157 
00159         virtual void match(int c)
00160         {
00161                 int la_1 = LA(1);
00162                 if ( la_1 != c )
00163                         throw MismatchedCharException(la_1, c, false, this);
00164                 consume();
00165         }
00166 
00170         virtual void match(const BitSet& b)
00171         {
00172                 int la_1 = LA(1);
00173 
00174                 if ( !b.member(la_1) )
00175                         throw MismatchedCharException( la_1, b, false, this );
00176                 consume();
00177         }
00178 
00182         virtual void match( const char* s )
00183         {
00184                 while( *s != '\0' )
00185                 {
00186                         // the & 0xFF is here to prevent sign extension lateron
00187                         int la_1 = LA(1), c = (*s++ & 0xFF);
00188 
00189                         if ( la_1 != c )
00190                                 throw MismatchedCharException(la_1, c, false, this);
00191 
00192                         consume();
00193                 }
00194         }
00198         virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
00199         {
00200                 size_t len = s.length();
00201 
00202                 for (size_t i = 0; i < len; i++)
00203                 {
00204                         // the & 0xFF is here to prevent sign extension lateron
00205                         int la_1 = LA(1), c = (s[i] & 0xFF);
00206 
00207                         if ( la_1 != c )
00208                                 throw MismatchedCharException(la_1, c, false, this);
00209 
00210                         consume();
00211                 }
00212         }
00216         virtual void matchNot(int c)
00217         {
00218                 int la_1 = LA(1);
00219 
00220                 if ( la_1 == c )
00221                         throw MismatchedCharException(la_1, c, true, this);
00222 
00223                 consume();
00224         }
00228         virtual void matchRange(int c1, int c2)
00229         {
00230                 int la_1 = LA(1);
00231 
00232                 if ( la_1 < c1 || la_1 > c2 )
00233                         throw MismatchedCharException(la_1, c1, c2, false, this);
00234 
00235                 consume();
00236         }
00237 
00238         virtual bool getCaseSensitive() const
00239         {
00240                 return caseSensitive;
00241         }
00242 
00243         virtual void setCaseSensitive(bool t)
00244         {
00245                 caseSensitive = t;
00246         }
00247 
00248         virtual bool getCaseSensitiveLiterals() const=0;
00249 
00251         virtual int getLine() const
00252         {
00253                 return inputState->line;
00254         }
00255 
00257         virtual void setLine(int l)
00258         {
00259                 inputState->line = l;
00260         }
00261 
00263         virtual int getColumn() const
00264         {
00265                 return inputState->column;
00266         }
00268         virtual void setColumn(int c)
00269         {
00270                 inputState->column = c;
00271         }
00272 
00274         virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
00275         {
00276                 return inputState->filename;
00277         }
00279         virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
00280         {
00281                 inputState->filename = f;
00282         }
00283 
00284         virtual bool getCommitToPath() const
00285         {
00286                 return commitToPath;
00287         }
00288 
00289         virtual void setCommitToPath(bool commit)
00290         {
00291                 commitToPath = commit;
00292         }
00293 
00295         virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
00296         {
00297                 return text;
00298         }
00299 
00300         virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
00301         {
00302                 text = s;
00303         }
00304 
00305         virtual void resetText()
00306         {
00307                 text = "";
00308                 inputState->tokenStartColumn = inputState->column;
00309                 inputState->tokenStartLine = inputState->line;
00310         }
00311 
00312         virtual RefToken getTokenObject() const
00313         {
00314                 return _returnToken;
00315         }
00316 
00320         virtual void newline()
00321         {
00322                 ++inputState->line;
00323                 inputState->column = 1;
00324         }
00325 
00330         virtual void tab()
00331         {
00332                 int c = getColumn();
00333                 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
00334                 setColumn( nc );
00335         }
00337         int setTabsize( int size )
00338         {
00339                 int oldsize = tabsize;
00340                 tabsize = size;
00341                 return oldsize;
00342         }
00344         int getTabSize() const
00345         {
00346                 return tabsize;
00347         }
00348 
00352         virtual void panic();
00356         virtual void panic(const ANTLR_USE_NAMESPACE(std)string& s);
00357 
00359         virtual void reportError(const RecognitionException& e);
00360 
00362         virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
00363 
00365         virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
00366 
00367         virtual InputBuffer& getInputBuffer()
00368         {
00369                 return inputState->getInput();
00370         }
00371 
00372         virtual LexerSharedInputState getInputState()
00373         {
00374                 return inputState;
00375         }
00376 
00379         virtual void setInputState(LexerSharedInputState state)
00380         {
00381                 inputState = state;
00382         }
00383 
00385         virtual void setTokenObjectFactory(factory_type factory)
00386         {
00387                 tokenFactory = factory;
00388         }
00389 
00393         virtual int testLiteralsTable(int ttype) const
00394         {
00395                 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
00396                 if (i != literals.end())
00397                         ttype = (*i).second;
00398                 return ttype;
00399         }
00400 
00406         virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
00407         {
00408                 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
00409                 if (i != literals.end())
00410                         ttype = (*i).second;
00411                 return ttype;
00412         }
00413 
00415         virtual int toLower(int c) const
00416         {
00417                 // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
00418                 // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
00419                 // this one is more structural. Maybe make this configurable.
00420                 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
00421         }
00422 
00438         virtual void uponEOF()
00439         {
00440         }
00441 
00443         virtual void traceIndent();
00444         virtual void traceIn(const char* rname);
00445         virtual void traceOut(const char* rname);
00446 
00447 #ifndef NO_STATIC_CONSTS
00448         static const int EOF_CHAR = EOF;
00449 #else
00450         enum {
00451                 EOF_CHAR = EOF
00452         };
00453 #endif
00454 protected:
00455         ANTLR_USE_NAMESPACE(std)string text; 
00456 
00457         bool saveConsumedInput;
00458         factory_type tokenFactory;                              
00459         bool caseSensitive;                                             
00460         ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
00461 
00462         RefToken _returnToken;          
00463 
00465         LexerSharedInputState inputState;
00466 
00471         bool commitToPath;
00472 
00473         int tabsize;    
00474 
00476         virtual RefToken makeToken(int t)
00477         {
00478                 RefToken tok = tokenFactory();
00479                 tok->setType(t);
00480                 tok->setColumn(inputState->tokenStartColumn);
00481                 tok->setLine(inputState->tokenStartLine);
00482                 return tok;
00483         }
00484 
00487         class Tracer {
00488         private:
00489                 CharScanner* parser;
00490                 const char* text;
00491 
00492                 Tracer(const Tracer& other);                                    // undefined
00493                 Tracer& operator=(const Tracer& other);         // undefined
00494         public:
00495                 Tracer( CharScanner* p,const char* t )
00496                 : parser(p), text(t)
00497                 {
00498                         parser->traceIn(text);
00499                 }
00500                 ~Tracer()
00501                 {
00502                         parser->traceOut(text);
00503                 }
00504         };
00505 
00506         int traceDepth;
00507 private:
00508         CharScanner( const CharScanner& other );                                        // undefined
00509         CharScanner& operator=( const CharScanner& other );     // undefined
00510 
00511 #ifndef NO_STATIC_CONSTS
00512         static const int NO_CHAR = 0;
00513 #else
00514         enum {
00515                 NO_CHAR = 0
00516         };
00517 #endif
00518 };
00519 
00520 inline int CharScanner::LA(unsigned int i)
00521 {
00522         int c = inputState->getInput().LA(i);
00523 
00524         if ( caseSensitive )
00525                 return c;
00526         else
00527                 return toLower(c);      // VC 6 tolower bug caught in toLower.
00528 }
00529 
00530 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
00531 {
00532         if (scanner->getCaseSensitiveLiterals())
00533                 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
00534         else
00535         {
00536 #ifdef NO_STRCASECMP
00537                 return (stricmp(x.c_str(),y.c_str())<0);
00538 #else
00539                 return (strcasecmp(x.c_str(),y.c_str())<0);
00540 #endif
00541         }
00542 }
00543 
00544 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00545 }
00546 #endif
00547 
00548 #endif //INC_CharScanner_hpp__

Generated on Wed Nov 3 12:58:52 2004 for Lemur Toolkit by doxygen1.2.18