From ea0629af91963ee2e4ab6b41124b5a7075ddd309 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sun, 18 May 2008 12:18:12 +0000 Subject: [PATCH] don't abuse exceptiosns for indicating EOF SVN-Revision: 5490 --- src/lisp/lexer.cpp | 257 ++++++++++++++++++++++++++--------------------------- src/lisp/lexer.hpp | 3 +- 2 files changed, 126 insertions(+), 134 deletions(-) diff --git a/src/lisp/lexer.cpp b/src/lisp/lexer.cpp index 05b52bc09..e1a4220f5 100644 --- a/src/lisp/lexer.cpp +++ b/src/lisp/lexer.cpp @@ -28,20 +28,13 @@ namespace lisp { -class EOFException -{ -}; - Lexer::Lexer(std::istream& newstream) : stream(newstream), eof(false), linenumber(0) { - try { - // trigger a refill of the buffer - c = 0; - bufend = 0; - nextChar(); - } catch(EOFException& ) { - } + // trigger a refill of the buffer + bufpos = NULL; + bufend = NULL; + nextChar(); } Lexer::~Lexer() @@ -51,14 +44,15 @@ Lexer::~Lexer() void Lexer::nextChar() { - ++c; - if(c >= bufend) { - if(eof) - throw EOFException(); + if(bufpos >= bufend) { + if(eof) { + c = EOF; + return; + } stream.read(buffer, BUFFER_SIZE); size_t bytes_read = stream.gcount(); - c = buffer; + bufpos = buffer; bufend = buffer + bytes_read; // the following is a hack that appends an additional ' ' at the end of @@ -70,6 +64,7 @@ Lexer::nextChar() ++bufend; } } + c = *bufpos++; } Lexer::TokenType @@ -77,142 +72,138 @@ Lexer::getNextToken() { static const char* delims = "\"();"; - try { - while(isspace(*c)) { - if(*c == '\n') - ++linenumber; - nextChar(); - }; + while(isspace(c)) { + if(c == '\n') + ++linenumber; + nextChar(); + }; - token_length = 0; + token_length = 0; - switch(*c) { - case ';': // comment - while(true) { - nextChar(); - if(*c == '\n') { - ++linenumber; - break; - } - } - return getNextToken(); // and again - case '(': + switch(c) { + case ';': // comment + while(true) { nextChar(); - return TOKEN_OPEN_PAREN; - case ')': + if(c == '\n') { + ++linenumber; + break; + } + } + return getNextToken(); // and again + case '(': + nextChar(); + return TOKEN_OPEN_PAREN; + case ')': + nextChar(); + return TOKEN_CLOSE_PAREN; + case '"': { // string + int startline = linenumber; + while(1) { nextChar(); - return TOKEN_CLOSE_PAREN; - case '"': { // string - int startline = linenumber; - try { - while(1) { - nextChar(); - if(*c == '"') - break; - else if (*c == '\r') // XXX this breaks with pure \r EOL - continue; - else if(*c == '\n') - linenumber++; - else if(*c == '\\') { - nextChar(); - switch(*c) { - case 'n': - *c = '\n'; - break; - case 't': - *c = '\t'; - break; - } - } - if(token_length < MAX_TOKEN_LENGTH) - token_string[token_length++] = *c; + switch(c) { + case '"': + nextChar(); + goto string_finished; + case '\r': + continue; + case '\n': + linenumber++; + break; + case '\\': + nextChar(); + switch(c) { + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; } - token_string[token_length] = 0; - } catch(EOFException& ) { + break; + case EOF: { std::stringstream msg; msg << "Parse error in line " << startline << ": " << "EOF while parsing string."; throw std::runtime_error(msg.str()); } + default: + break; + } + if(token_length < MAX_TOKEN_LENGTH) + token_string[token_length++] = c; + } +string_finished: + token_string[token_length] = 0; + return TOKEN_STRING; + } + case '#': // constant + nextChar(); + + while(isalnum(c) || c == '_') { + if(token_length < MAX_TOKEN_LENGTH) + token_string[token_length++] = c; nextChar(); - return TOKEN_STRING; } - case '#': // constant - try { - nextChar(); + token_string[token_length] = 0; - while(isalnum(*c) || *c == '_') { - if(token_length < MAX_TOKEN_LENGTH) - token_string[token_length++] = *c; - nextChar(); - } - token_string[token_length] = 0; - } catch(EOFException& ) { - std::stringstream msg; - msg << "Parse Error in line " << linenumber << ": " - << "EOF while parsing constant."; - throw std::runtime_error(msg.str()); - } + if(strcmp(token_string, "t") == 0) + return TOKEN_TRUE; + if(strcmp(token_string, "f") == 0) + return TOKEN_FALSE; - if(strcmp(token_string, "t") == 0) - return TOKEN_TRUE; - if(strcmp(token_string, "f") == 0) - return TOKEN_FALSE; + // we only handle #t and #f constants at the moment... + { + std::stringstream msg; + msg << "Parse Error in line " << linenumber << ": " + << "Unknown constant '" << token_string << "'."; + throw std::runtime_error(msg.str()); + } - // we only handle #t and #f constants at the moment... + case EOF: + return TOKEN_EOF; - { - std::stringstream msg; - msg << "Parse Error in line " << linenumber << ": " - << "Unknown constant '" << token_string << "'."; - throw std::runtime_error(msg.str()); - } + default: + if(isdigit(c) || c == '-') { + bool have_nondigits = false; + bool have_digits = false; + int have_floating_point = 0; - default: - if(isdigit(*c) || *c == '-') { - bool have_nondigits = false; - bool have_digits = false; - int have_floating_point = 0; - - do { - if(isdigit(*c)) - have_digits = true; - else if(*c == '.') - ++have_floating_point; - else if(isalnum(*c) || *c == '_') - have_nondigits = true; - - if(token_length < MAX_TOKEN_LENGTH) - token_string[token_length++] = *c; - - nextChar(); - } while(!isspace(*c) && !strchr(delims, *c)); - - token_string[token_length] = 0; - - // no nextChar - - if(have_nondigits || !have_digits || have_floating_point > 1) - return TOKEN_SYMBOL; - else if(have_floating_point == 1) - return TOKEN_REAL; - else - return TOKEN_INTEGER; - } else { - do { - if(token_length < MAX_TOKEN_LENGTH) - token_string[token_length++] = *c; - nextChar(); - } while(!isspace(*c) && !strchr(delims, *c)); - token_string[token_length] = 0; - - // no nextChar + do { + if(isdigit(c)) + have_digits = true; + else if(c == '.') + ++have_floating_point; + else if(isalnum(c) || c == '_') + have_nondigits = true; + if(token_length < MAX_TOKEN_LENGTH) + token_string[token_length++] = c; + + nextChar(); + } while(!isspace(c) && !strchr(delims, c)); + + token_string[token_length] = 0; + + // no nextChar + + if(have_nondigits || !have_digits || have_floating_point > 1) return TOKEN_SYMBOL; - } - } - } catch(EOFException& ) { - return TOKEN_EOF; + else if(have_floating_point == 1) + return TOKEN_REAL; + else + return TOKEN_INTEGER; + } else { + do { + if(token_length < MAX_TOKEN_LENGTH) + token_string[token_length++] = c; + nextChar(); + } while(!isspace(c) && !strchr(delims, c)); + token_string[token_length] = 0; + + // no nextChar + + return TOKEN_SYMBOL; + } } } diff --git a/src/lisp/lexer.hpp b/src/lisp/lexer.hpp index 1cd062b0d..5c5f2d58c 100644 --- a/src/lisp/lexer.hpp +++ b/src/lisp/lexer.hpp @@ -59,7 +59,8 @@ private: int linenumber; char buffer[BUFFER_SIZE+1]; char* bufend; - char* c; + char* bufpos; + int c; char token_string[MAX_TOKEN_LENGTH + 1]; int token_length; }; -- 2.11.0