00001 #ifndef INC_CharScanner_hpp__
00002 #define INC_CharScanner_hpp__
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <antlr/config.hpp>
00012
00013 #include <map>
00014
00015 #ifdef HAS_NOT_CCTYPE_H
00016 #include <ctype.h>
00017 #else
00018 #include <cctype>
00019 #endif
00020
00021 #if ( _MSC_VER == 1200 )
00022
00023
00024 # include <stdio.h>
00025 #endif
00026
00027 #include <antlr/TokenStream.hpp>
00028 #include <antlr/RecognitionException.hpp>
00029 #include <antlr/SemanticException.hpp>
00030 #include <antlr/MismatchedCharException.hpp>
00031 #include <antlr/InputBuffer.hpp>
00032 #include <antlr/BitSet.hpp>
00033 #include <antlr/LexerSharedInputState.hpp>
00034
00035 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00036 namespace antlr {
00037 #endif
00038
00039 class ANTLR_API CharScanner;
00040
00041 ANTLR_C_USING(tolower)
00042
00043 #ifdef ANTLR_REALLY_NO_STRCASECMP
00044
00045
00046 inline int strcasecmp(const char *s1, const char *s2)
00047 {
00048 while (true)
00049 {
00050 char c1 = tolower(*s1++),
00051 c2 = tolower(*s2++);
00052 if (c1 < c2) return -1;
00053 if (c1 > c2) return 1;
00054 if (c1 == 0) return 0;
00055 }
00056 }
00057 #else
00058 #ifdef NO_STRCASECMP
00059 ANTLR_C_USING(stricmp)
00060 #else
00061 ANTLR_C_USING(strcasecmp)
00062 #endif
00063 #endif
00064
00067 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
00068 private:
00069 const CharScanner* scanner;
00070 public:
00071 #ifdef NO_TEMPLATE_PARTS
00072 CharScannerLiteralsLess() {}
00073 #endif
00074 CharScannerLiteralsLess(const CharScanner* theScanner)
00075 : scanner(theScanner)
00076 {
00077 }
00078 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
00079
00080
00081
00082 };
00083
00086 class ANTLR_API CharScanner : public TokenStream {
00087 protected:
00088 typedef RefToken (*factory_type)();
00089 public:
00090 CharScanner(InputBuffer& cb, bool case_sensitive );
00091 CharScanner(InputBuffer* cb, bool case_sensitive );
00092 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
00093
00094 virtual ~CharScanner()
00095 {
00096 }
00097
00098 virtual int LA(unsigned int i);
00099
00100 virtual void append(char c)
00101 {
00102 if (saveConsumedInput) {
00103 int l = text.length();
00104 if ((l%256) == 0)
00105 text.reserve(l+256);
00106 text.replace(l,0,&c,1);
00107 }
00108 }
00109
00110 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
00111 {
00112 if (saveConsumedInput)
00113 text+=s;
00114 }
00115
00116 virtual void commit()
00117 {
00118 inputState->getInput().commit();
00119 }
00120
00121 virtual void consume();
00122
00124 virtual void consumeUntil(int c)
00125 {
00126 for(;;)
00127 {
00128 int la_1 = LA(1);
00129 if( la_1 == EOF_CHAR || la_1 == c )
00130 break;
00131 consume();
00132 }
00133 }
00134
00136 virtual void consumeUntil(const BitSet& set)
00137 {
00138 for(;;)
00139 {
00140 int la_1 = LA(1);
00141 if( la_1 == EOF_CHAR || set.member(la_1) )
00142 break;
00143 consume();
00144 }
00145 }
00146
00148 virtual unsigned int mark()
00149 {
00150 return inputState->getInput().mark();
00151 }
00153 virtual void rewind(unsigned int pos)
00154 {
00155 inputState->getInput().rewind(pos);
00156 }
00157
00159 virtual void match(int c)
00160 {
00161 int la_1 = LA(1);
00162 if ( la_1 != c )
00163 throw MismatchedCharException(la_1, c, false, this);
00164 consume();
00165 }
00166
00170 virtual void match(const BitSet& b)
00171 {
00172 int la_1 = LA(1);
00173
00174 if ( !b.member(la_1) )
00175 throw MismatchedCharException( la_1, b, false, this );
00176 consume();
00177 }
00178
00182 virtual void match( const char* s )
00183 {
00184 while( *s != '\0' )
00185 {
00186
00187 int la_1 = LA(1), c = (*s++ & 0xFF);
00188
00189 if ( la_1 != c )
00190 throw MismatchedCharException(la_1, c, false, this);
00191
00192 consume();
00193 }
00194 }
00198 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
00199 {
00200 size_t len = s.length();
00201
00202 for (size_t i = 0; i < len; i++)
00203 {
00204
00205 int la_1 = LA(1), c = (s[i] & 0xFF);
00206
00207 if ( la_1 != c )
00208 throw MismatchedCharException(la_1, c, false, this);
00209
00210 consume();
00211 }
00212 }
00216 virtual void matchNot(int c)
00217 {
00218 int la_1 = LA(1);
00219
00220 if ( la_1 == c )
00221 throw MismatchedCharException(la_1, c, true, this);
00222
00223 consume();
00224 }
00228 virtual void matchRange(int c1, int c2)
00229 {
00230 int la_1 = LA(1);
00231
00232 if ( la_1 < c1 || la_1 > c2 )
00233 throw MismatchedCharException(la_1, c1, c2, false, this);
00234
00235 consume();
00236 }
00237
00238 virtual bool getCaseSensitive() const
00239 {
00240 return caseSensitive;
00241 }
00242
00243 virtual void setCaseSensitive(bool t)
00244 {
00245 caseSensitive = t;
00246 }
00247
00248 virtual bool getCaseSensitiveLiterals() const=0;
00249
00251 virtual int getLine() const
00252 {
00253 return inputState->line;
00254 }
00255
00257 virtual void setLine(int l)
00258 {
00259 inputState->line = l;
00260 }
00261
00263 virtual int getColumn() const
00264 {
00265 return inputState->column;
00266 }
00268 virtual void setColumn(int c)
00269 {
00270 inputState->column = c;
00271 }
00272
00274 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
00275 {
00276 return inputState->filename;
00277 }
00279 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
00280 {
00281 inputState->filename = f;
00282 }
00283
00284 virtual bool getCommitToPath() const
00285 {
00286 return commitToPath;
00287 }
00288
00289 virtual void setCommitToPath(bool commit)
00290 {
00291 commitToPath = commit;
00292 }
00293
00295 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
00296 {
00297 return text;
00298 }
00299
00300 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
00301 {
00302 text = s;
00303 }
00304
00305 virtual void resetText()
00306 {
00307 text = "";
00308 inputState->tokenStartColumn = inputState->column;
00309 inputState->tokenStartLine = inputState->line;
00310 }
00311
00312 virtual RefToken getTokenObject() const
00313 {
00314 return _returnToken;
00315 }
00316
00320 virtual void newline()
00321 {
00322 ++inputState->line;
00323 inputState->column = 1;
00324 }
00325
00330 virtual void tab()
00331 {
00332 int c = getColumn();
00333 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;
00334 setColumn( nc );
00335 }
00337 int setTabsize( int size )
00338 {
00339 int oldsize = tabsize;
00340 tabsize = size;
00341 return oldsize;
00342 }
00344 int getTabSize() const
00345 {
00346 return tabsize;
00347 }
00348
00352 virtual void panic();
00356 virtual void panic(const ANTLR_USE_NAMESPACE(std)string& s);
00357
00359 virtual void reportError(const RecognitionException& e);
00360
00362 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
00363
00365 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
00366
00367 virtual InputBuffer& getInputBuffer()
00368 {
00369 return inputState->getInput();
00370 }
00371
00372 virtual LexerSharedInputState getInputState()
00373 {
00374 return inputState;
00375 }
00376
00379 virtual void setInputState(LexerSharedInputState state)
00380 {
00381 inputState = state;
00382 }
00383
00385 virtual void setTokenObjectFactory(factory_type factory)
00386 {
00387 tokenFactory = factory;
00388 }
00389
00393 virtual int testLiteralsTable(int ttype) const
00394 {
00395 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
00396 if (i != literals.end())
00397 ttype = (*i).second;
00398 return ttype;
00399 }
00400
00406 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
00407 {
00408 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
00409 if (i != literals.end())
00410 ttype = (*i).second;
00411 return ttype;
00412 }
00413
00415 virtual int toLower(int c) const
00416 {
00417
00418
00419
00420 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
00421 }
00422
00438 virtual void uponEOF()
00439 {
00440 }
00441
00443 virtual void traceIndent();
00444 virtual void traceIn(const char* rname);
00445 virtual void traceOut(const char* rname);
00446
00447 #ifndef NO_STATIC_CONSTS
00448 static const int EOF_CHAR = EOF;
00449 #else
00450 enum {
00451 EOF_CHAR = EOF
00452 };
00453 #endif
00454 protected:
00455 ANTLR_USE_NAMESPACE(std)string text;
00456
00457 bool saveConsumedInput;
00458 factory_type tokenFactory;
00459 bool caseSensitive;
00460 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals;
00461
00462 RefToken _returnToken;
00463
00465 LexerSharedInputState inputState;
00466
00471 bool commitToPath;
00472
00473 int tabsize;
00474
00476 virtual RefToken makeToken(int t)
00477 {
00478 RefToken tok = tokenFactory();
00479 tok->setType(t);
00480 tok->setColumn(inputState->tokenStartColumn);
00481 tok->setLine(inputState->tokenStartLine);
00482 return tok;
00483 }
00484
00487 class Tracer {
00488 private:
00489 CharScanner* parser;
00490 const char* text;
00491
00492 Tracer(const Tracer& other);
00493 Tracer& operator=(const Tracer& other);
00494 public:
00495 Tracer( CharScanner* p,const char* t )
00496 : parser(p), text(t)
00497 {
00498 parser->traceIn(text);
00499 }
00500 ~Tracer()
00501 {
00502 parser->traceOut(text);
00503 }
00504 };
00505
00506 int traceDepth;
00507 private:
00508 CharScanner( const CharScanner& other );
00509 CharScanner& operator=( const CharScanner& other );
00510
00511 #ifndef NO_STATIC_CONSTS
00512 static const int NO_CHAR = 0;
00513 #else
00514 enum {
00515 NO_CHAR = 0
00516 };
00517 #endif
00518 };
00519
00520 inline int CharScanner::LA(unsigned int i)
00521 {
00522 int c = inputState->getInput().LA(i);
00523
00524 if ( caseSensitive )
00525 return c;
00526 else
00527 return toLower(c);
00528 }
00529
00530 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
00531 {
00532 if (scanner->getCaseSensitiveLiterals())
00533 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
00534 else
00535 {
00536 #ifdef NO_STRCASECMP
00537 return (stricmp(x.c_str(),y.c_str())<0);
00538 #else
00539 return (strcasecmp(x.c_str(),y.c_str())<0);
00540 #endif
00541 }
00542 }
00543
00544 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00545 }
00546 #endif
00547
00548 #endif //INC_CharScanner_hpp__