src/lisp/lexer.cpp

   1 //  SuperTux
   2 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
   3 //
   4 //  This program is free software: you can redistribute it and/or modify
   5 //  it under the terms of the GNU General Public License as published by
   6 //  the Free Software Foundation, either version 3 of the License, or
   7 //  (at your option) any later version.
   8 //
   9 //  This program is distributed in the hope that it will be useful,
  10 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 //  GNU General Public License for more details.
  13 //
  14 //  You should have received a copy of the GNU General Public License
  15 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 #include "lisp/lexer.hpp"
  18
  19 #include <cstring>
  20 #include <sstream>
  21 #include <stdexcept>
  22 #include <stdio.h>
  23
  24 namespace lisp {
  25
  26 Lexer::Lexer(std::istream& newstream)
  27   : stream(newstream), eof(false), linenumber(0)
  28 {
  29   // trigger a refill of the buffer
  30   bufpos = NULL;
  31   bufend = NULL;
  32   nextChar();
  33 }
  34
  35 Lexer::~Lexer()
  36 {
  37 }
  38
  39 void
  40 Lexer::nextChar()
  41 {
  42   if(bufpos >= bufend) {
  43     if(eof) {
  44       c = EOF;
  45       return;
  46     }
  47     stream.read(buffer, BUFFER_SIZE);
  48     size_t bytes_read = stream.gcount();
  49
  50     bufpos = buffer;
  51     bufend = buffer + bytes_read;
  52
  53     // the following is a hack that appends an additional ' ' at the end of
  54     // the file to avoid problems when parsing symbols/elements and a sudden
  55     // EOF. This is faster than relying on unget and IMO also nicer.
  56     if(bytes_read == 0 || stream.eof()) {
  57       eof = true;
  58       *bufend = ' ';
  59       ++bufend;
  60     }
  61   }
  62   c = *bufpos++;
  63   if(c == '\n')
  64     ++linenumber;
  65 }
  66
  67 void
  68 Lexer::addChar()
  69 {
  70   if(token_length < MAX_TOKEN_LENGTH)
  71     token_string[token_length++] = c;
  72   nextChar();
  73 }
  74
  75 Lexer::TokenType
  76 Lexer::getNextToken()
  77 {
  78   static const char* delims = "\"();";
  79
  80   while(isspace(c)) {
  81     nextChar();
  82   }
  83
  84   token_length = 0;
  85
  86   switch(c) {
  87     case ';': // comment
  88       while(c != '\n') {
  89         nextChar();
  90       }
  91       return getNextToken(); // and again
  92     case '(':
  93       nextChar();
  94       return TOKEN_OPEN_PAREN;
  95     case ')':
  96       nextChar();
  97       return TOKEN_CLOSE_PAREN;
  98     case '"': {  // string
  99       int startline = linenumber;
 100       while(1) {
 101         nextChar();
 102         switch(c) {
 103           case '"':
 104             nextChar();
 105             goto string_finished;
 106           case '\r':
 107             continue;
 108           case '\n':
 109             break;
 110           case '\\':
 111             nextChar();
 112             switch(c) {
 113               case 'n':
 114                 c = '\n';
 115                 break;
 116               case 't':
 117                 c = '\t';
 118                 break;
 119             }
 120             break;
 121           case EOF: {
 122             std::stringstream msg;
 123             msg << "Parse error in line " << startline << ": "
 124                 << "EOF while parsing string.";
 125             throw std::runtime_error(msg.str());
 126           }
 127           default:
 128             break;
 129         }
 130         if(token_length < MAX_TOKEN_LENGTH)
 131           token_string[token_length++] = c;
 132       }
 133       string_finished:
 134       token_string[token_length] = 0;
 135       return TOKEN_STRING;
 136     }
 137     case '#': // constant
 138       nextChar();
 139
 140       while(isalnum(c) || c == '_') {
 141         addChar();
 142       }
 143       token_string[token_length] = 0;
 144
 145       if(strcmp(token_string, "t") == 0)
 146         return TOKEN_TRUE;
 147       if(strcmp(token_string, "f") == 0)
 148         return TOKEN_FALSE;
 149
 150       // we only handle #t and #f constants at the moment...
 151       {
 152         std::stringstream msg;
 153         msg << "Parse Error in line " << linenumber << ": "
 154             << "Unknown constant '" << token_string << "'.";
 155         throw std::runtime_error(msg.str());
 156       }
 157
 158     case EOF:
 159       return TOKEN_EOF;
 160
 161     default:
 162       if(isdigit(c) || c == '-') {
 163         bool have_nondigits = false;
 164         bool have_digits = false;
 165         int have_floating_point = 0;
 166
 167         do {
 168           if(isdigit(c))
 169             have_digits = true;
 170           else if(c == '.')
 171             ++have_floating_point;
 172           else if(isalnum(c) || c == '_')
 173             have_nondigits = true;
 174
 175           addChar();
 176         } while(!isspace(c) && !strchr(delims, c));
 177
 178         token_string[token_length] = 0;
 179
 180         // no nextChar
 181
 182         if(have_nondigits || !have_digits || have_floating_point > 1)
 183           return TOKEN_SYMBOL;
 184         else if(have_floating_point == 1)
 185           return TOKEN_REAL;
 186         else
 187           return TOKEN_INTEGER;
 188       } else {
 189         do {
 190           addChar();
 191         } while(!isspace(c) && !strchr(delims, c));
 192         token_string[token_length] = 0;
 193
 194         // no nextChar
 195
 196         return TOKEN_SYMBOL;
 197       }
 198   }
 199 }
 200
 201 } // end of namespace lisp
 202
 203 /* EOF */