2 // Copyright (C) 2006 Matthias Braun <matze@braunis.de>
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #include "lisp/lexer.hpp"
26 Lexer::Lexer(std::istream& newstream) :
36 // trigger a refill of the buffer
49 if(bufpos >= bufend || (bufpos == NULL && bufend == NULL) /* Initial refill trigger */) {
54 stream.read(buffer, BUFFER_SIZE);
55 size_t bytes_read = stream.gcount();
58 bufend = buffer + bytes_read;
60 // the following is a hack that appends an additional ' ' at the end of
61 // the file to avoid problems when parsing symbols/elements and a sudden
62 // EOF. This is faster than relying on unget and IMO also nicer.
63 if(bytes_read == 0 || stream.eof()) {
81 if(token_length < MAX_TOKEN_LENGTH)
82 token_string[token_length++] = c;
89 static const char* delims = "\"();";
102 return getNextToken(); // and again
105 return TOKEN_OPEN_PAREN;
108 return TOKEN_CLOSE_PAREN;
109 case '"': { // string
110 int startline = linenumber;
116 goto string_finished;
133 std::stringstream msg;
134 msg << "Parse error in line " << startline << ": "
135 << "EOF while parsing string.";
136 throw std::runtime_error(msg.str());
141 if(token_length < MAX_TOKEN_LENGTH)
142 token_string[token_length++] = c;
145 token_string[token_length] = 0;
148 case '#': // constant
151 while(isalnum(c) || c == '_') {
154 token_string[token_length] = 0;
156 if(strcmp(token_string, "t") == 0)
158 if(strcmp(token_string, "f") == 0)
161 // we only handle #t and #f constants at the moment...
163 std::stringstream msg;
164 msg << "Parse Error in line " << linenumber << ": "
165 << "Unknown constant '" << token_string << "'.";
166 throw std::runtime_error(msg.str());
173 if(isdigit(c) || c == '-') {
174 bool have_nondigits = false;
175 bool have_digits = false;
176 int have_floating_point = 0;
182 ++have_floating_point;
183 else if(isalnum(c) || c == '_')
184 have_nondigits = true;
187 } while(!isspace(c) && !strchr(delims, c));
189 token_string[token_length] = 0;
193 if(have_nondigits || !have_digits || have_floating_point > 1)
195 else if(have_floating_point == 1)
198 return TOKEN_INTEGER;
202 } while(!isspace(c) && !strchr(delims, c));
203 token_string[token_length] = 0;
212 } // end of namespace lisp