src/lisp/lexer.cpp

   1 //  $Id$
   2 //
   3 //  SuperTux
   4 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
   5 //
   6 //  This program is free software; you can redistribute it and/or
   7 //  modify it under the terms of the GNU General Public License
   8 //  as published by the Free Software Foundation; either version 2
   9 //  of the License, or (at your option) any later version.
  10 //
  11 //  This program is distributed in the hope that it will be useful,
  12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 //  GNU General Public License for more details.
  15 //
  16 //  You should have received a copy of the GNU General Public License
  17 //  along with this program; if not, write to the Free Software
  18 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19 #include <config.h>
  20
  21 #include <sstream>
  22 #include <cstring>
  23 #include <stdexcept>
  24 #include <iostream>
  25
  26 #include "lexer.hpp"
  27
  28 namespace lisp
  29 {
  30
  31 Lexer::Lexer(std::istream& newstream)
  32     : stream(newstream), eof(false), linenumber(0)
  33 {
  34   // trigger a refill of the buffer
  35   bufpos = NULL;
  36   bufend = NULL;
  37   nextChar();
  38 }
  39
  40 Lexer::~Lexer()
  41 {
  42 }
  43
  44 void
  45 Lexer::nextChar()
  46 {
  47   if(bufpos >= bufend) {
  48     if(eof) {
  49       c = EOF;
  50       return;
  51     }
  52     stream.read(buffer, BUFFER_SIZE);
  53     size_t bytes_read = stream.gcount();
  54
  55     bufpos = buffer;
  56     bufend = buffer + bytes_read;
  57
  58     // the following is a hack that appends an additional ' ' at the end of
  59     // the file to avoid problems when parsing symbols/elements and a sudden
  60     // EOF. This is faster than relying on unget and IMO also nicer.
  61     if(bytes_read == 0 || stream.eof()) {
  62       eof = true;
  63       *bufend = ' ';
  64       ++bufend;
  65     }
  66   }
  67   c = *bufpos++;
  68   if(c == '\n')
  69     ++linenumber;
  70 }
  71
  72 void
  73 Lexer::addChar()
  74 {
  75   if(token_length < MAX_TOKEN_LENGTH)
  76     token_string[token_length++] = c;
  77   nextChar();
  78 }
  79
  80 Lexer::TokenType
  81 Lexer::getNextToken()
  82 {
  83   static const char* delims = "\"();";
  84
  85   while(isspace(c)) {
  86     nextChar();
  87   }
  88
  89   token_length = 0;
  90
  91   switch(c) {
  92     case ';': // comment
  93       while(c != '\n') {
  94         nextChar();
  95       }
  96       return getNextToken(); // and again
  97     case '(':
  98       nextChar();
  99       return TOKEN_OPEN_PAREN;
 100     case ')':
 101       nextChar();
 102       return TOKEN_CLOSE_PAREN;
 103     case '"': {  // string
 104       int startline = linenumber;
 105       while(1) {
 106         nextChar();
 107         switch(c) {
 108         case '"':
 109           nextChar();
 110           goto string_finished;
 111         case '\r':
 112           continue;
 113         case '\n':
 114           break;
 115         case '\\':
 116           nextChar();
 117           switch(c) {
 118           case 'n':
 119             c = '\n';
 120             break;
 121           case 't':
 122             c = '\t';
 123             break;
 124           }
 125           break;
 126         case EOF: {
 127           std::stringstream msg;
 128           msg << "Parse error in line " << startline << ": "
 129               << "EOF while parsing string.";
 130           throw std::runtime_error(msg.str());
 131         }
 132         default:
 133           break;
 134         }
 135         if(token_length < MAX_TOKEN_LENGTH)
 136           token_string[token_length++] = c;
 137       }
 138 string_finished:
 139       token_string[token_length] = 0;
 140       return TOKEN_STRING;
 141     }
 142     case '#': // constant
 143       nextChar();
 144
 145       while(isalnum(c) || c == '_') {
 146         addChar();
 147       }
 148       token_string[token_length] = 0;
 149
 150       if(strcmp(token_string, "t") == 0)
 151         return TOKEN_TRUE;
 152       if(strcmp(token_string, "f") == 0)
 153         return TOKEN_FALSE;
 154
 155       // we only handle #t and #f constants at the moment...
 156       {
 157         std::stringstream msg;
 158         msg << "Parse Error in line " << linenumber << ": "
 159             << "Unknown constant '" << token_string << "'.";
 160         throw std::runtime_error(msg.str());
 161       }
 162
 163     case EOF:
 164       return TOKEN_EOF;
 165
 166     default:
 167       if(isdigit(c) || c == '-') {
 168         bool have_nondigits = false;
 169         bool have_digits = false;
 170         int have_floating_point = 0;
 171
 172         do {
 173           if(isdigit(c))
 174             have_digits = true;
 175           else if(c == '.')
 176             ++have_floating_point;
 177           else if(isalnum(c) || c == '_')
 178             have_nondigits = true;
 179
 180           addChar();
 181         } while(!isspace(c) && !strchr(delims, c));
 182
 183         token_string[token_length] = 0;
 184
 185         // no nextChar
 186
 187         if(have_nondigits || !have_digits || have_floating_point > 1)
 188           return TOKEN_SYMBOL;
 189         else if(have_floating_point == 1)
 190           return TOKEN_REAL;
 191         else
 192           return TOKEN_INTEGER;
 193       } else {
 194         do {
 195           addChar();
 196         } while(!isspace(c) && !strchr(delims, c));
 197         token_string[token_length] = 0;
 198
 199         // no nextChar
 200
 201         return TOKEN_SYMBOL;
 202       }
 203   }
 204 }
 205
 206 } // end of namespace lisp