fix cr/lfs and remove trailing whitespaces...
[supertux.git] / src / squirrel / squirrel / sqlexer.cpp
1 /*
2         see copyright notice in squirrel.h
3 */
4 #include "sqpcheader.h"
5 #include <ctype.h>
6 #include <stdlib.h>
7 #include "sqtable.h"
8 #include "sqstring.h"
9 #include "sqcompiler.h"
10 #include "sqlexer.h"
11
12 #define CUR_CHAR (_currdata)
13 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
14 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
15 #define NEXT() {Next();_currentcolumn++;}
16 #define INIT_TEMP_STRING() { _longstr.resize(0);}
17 #define APPEND_CHAR(c) { _longstr.push_back(c);}
18 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
19 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
20
21 SQLexer::SQLexer(){}
22 SQLexer::~SQLexer()
23 {
24         _keywords->Release();
25 }
26
27 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
28 {
29         _errfunc = efunc;
30         _errtarget = ed;
31         _sharedstate = ss;
32         _keywords = SQTable::Create(ss, 26);
33         ADD_KEYWORD(while, TK_WHILE);
34         ADD_KEYWORD(do, TK_DO);
35         ADD_KEYWORD(if, TK_IF);
36         ADD_KEYWORD(else, TK_ELSE);
37         ADD_KEYWORD(break, TK_BREAK);
38         ADD_KEYWORD(continue, TK_CONTINUE);
39         ADD_KEYWORD(return, TK_RETURN);
40         ADD_KEYWORD(null, TK_NULL);
41         ADD_KEYWORD(function, TK_FUNCTION);
42         ADD_KEYWORD(local, TK_LOCAL);
43         ADD_KEYWORD(for, TK_FOR);
44         ADD_KEYWORD(foreach, TK_FOREACH);
45         ADD_KEYWORD(in, TK_IN);
46         ADD_KEYWORD(typeof, TK_TYPEOF);
47         ADD_KEYWORD(delegate, TK_DELEGATE);
48         ADD_KEYWORD(delete, TK_DELETE);
49         ADD_KEYWORD(try, TK_TRY);
50         ADD_KEYWORD(catch, TK_CATCH);
51         ADD_KEYWORD(throw, TK_THROW);
52         ADD_KEYWORD(clone, TK_CLONE);
53         ADD_KEYWORD(yield, TK_YIELD);
54         ADD_KEYWORD(resume, TK_RESUME);
55         ADD_KEYWORD(switch, TK_SWITCH);
56         ADD_KEYWORD(case, TK_CASE);
57         ADD_KEYWORD(default, TK_DEFAULT);
58         ADD_KEYWORD(this, TK_THIS);
59         ADD_KEYWORD(parent,TK_PARENT);
60         ADD_KEYWORD(class,TK_CLASS);
61         ADD_KEYWORD(extends,TK_EXTENDS);
62         ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
63         ADD_KEYWORD(instanceof,TK_INSTANCEOF);
64         ADD_KEYWORD(vargc,TK_VARGC);
65         ADD_KEYWORD(vargv,TK_VARGV);
66         ADD_KEYWORD(true,TK_TRUE);
67         ADD_KEYWORD(false,TK_FALSE);
68         ADD_KEYWORD(static,TK_STATIC);
69
70         _readf = rg;
71         _up = up;
72         _lasttokenline = _currentline = 1;
73         _currentcolumn = 0;
74         _prevtoken = -1;
75         Next();
76 }
77
78 void SQLexer::Error(const SQChar *err)
79 {
80         _errfunc(_errtarget,err);
81 }
82
83 void SQLexer::Next()
84 {
85         SQInteger t = _readf(_up);
86         if(t > MAX_CHAR) Error(_SC("Invalid character"));
87         if(t != 0) {
88                 _currdata = (LexChar)t;
89                 return;
90         }
91         _currdata = SQUIRREL_EOB;
92 }
93
94 const SQChar *SQLexer::Tok2Str(SQInteger tok)
95 {
96         SQObjectPtr itr, key, val;
97         SQInteger nitr;
98         while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
99                 itr = (SQInteger)nitr;
100                 if(((SQInteger)_integer(val)) == tok)
101                         return _stringval(key);
102         }
103         return NULL;
104 }
105
106 void SQLexer::LexBlockComment()
107 {
108         bool done = false;
109         while(!done) {
110                 switch(CUR_CHAR) {
111                         case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
112                         case _SC('\n'): _currentline++; NEXT(); continue;
113                         case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
114                         default: NEXT();
115                 }
116         }
117 }
118
119 SQInteger SQLexer::Lex()
120 {
121         _lasttokenline = _currentline;
122         while(CUR_CHAR != SQUIRREL_EOB) {
123                 switch(CUR_CHAR){
124                 case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
125                 case _SC('\n'):
126                         _currentline++;
127                         _prevtoken=_curtoken;
128                         _curtoken=_SC('\n');
129                         NEXT();
130                         _currentcolumn=1;
131                         continue;
132                 case _SC('/'):
133                         NEXT();
134                         switch(CUR_CHAR){
135                         case _SC('*'):
136                                 NEXT();
137                                 LexBlockComment();
138                                 continue;
139                         case _SC('/'):
140                                 do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
141                                 continue;
142                         case _SC('='):
143                                 NEXT();
144                                 RETURN_TOKEN(TK_DIVEQ);
145                                 continue;
146                         case _SC('>'):
147                                 NEXT();
148                                 RETURN_TOKEN(TK_ATTR_CLOSE);
149                                 continue;
150                         default:
151                                 RETURN_TOKEN('/');
152                         }
153                 case _SC('='):
154                         NEXT();
155                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
156                         else { NEXT(); RETURN_TOKEN(TK_EQ); }
157                 case _SC('<'):
158                         NEXT();
159                         if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) }
160                         else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); }
161                         else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); }
162                         else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); }
163                         //else if ( CUR_CHAR == _SC('[') ) { NEXT(); ReadMultilineString(); RETURN_TOKEN(TK_STRING_LITERAL); }
164                         else { RETURN_TOKEN('<') }
165                 case _SC('>'):
166                         NEXT();
167                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
168                         else if(CUR_CHAR == _SC('>')){
169                                 NEXT();
170                                 if(CUR_CHAR == _SC('>')){
171                                         NEXT();
172                                         RETURN_TOKEN(TK_USHIFTR);
173                                 }
174                                 RETURN_TOKEN(TK_SHIFTR);
175                         }
176                         else { RETURN_TOKEN('>') }
177                 case _SC('!'):
178                         NEXT();
179                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
180                         else { NEXT(); RETURN_TOKEN(TK_NE); }
181                 case _SC('@'): {
182                         SQInteger stype;
183                         NEXT();
184                         if(CUR_CHAR != _SC('"'))
185                                 Error(_SC("string expected"));
186                         if((stype=ReadString('"',true))!=-1) {
187                                 RETURN_TOKEN(stype);
188                         }
189                         Error(_SC("error parsing the string"));
190                                            }
191                 case _SC('"'):
192                 case _SC('\''): {
193                         SQInteger stype;
194                         if((stype=ReadString(CUR_CHAR,false))!=-1){
195                                 RETURN_TOKEN(stype);
196                         }
197                         Error(_SC("error parsing the string"));
198                         }
199                 case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
200                 case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
201                         {SQInteger ret = CUR_CHAR;
202                         NEXT(); RETURN_TOKEN(ret); }
203                 case _SC('.'):
204                         NEXT();
205                         if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
206                         NEXT();
207                         if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
208                         NEXT();
209                         RETURN_TOKEN(TK_VARPARAMS);
210                 case _SC('&'):
211                         NEXT();
212                         if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
213                         else { NEXT(); RETURN_TOKEN(TK_AND); }
214                 case _SC('|'):
215                         NEXT();
216                         if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
217                         else { NEXT(); RETURN_TOKEN(TK_OR); }
218                 case _SC(':'):
219                         NEXT();
220                         if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
221                         else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
222                 case _SC('*'):
223                         NEXT();
224                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
225                         else RETURN_TOKEN('*');
226                 case _SC('%'):
227                         NEXT();
228                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
229                         else RETURN_TOKEN('%');
230                 case _SC('-'):
231                         NEXT();
232                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
233                         else if  (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
234                         else RETURN_TOKEN('-');
235                 case _SC('+'):
236                         NEXT();
237                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
238                         else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
239                         else RETURN_TOKEN('+');
240                 case SQUIRREL_EOB:
241                         return 0;
242                 default:{
243                                 if (scisdigit(CUR_CHAR)) {
244                                         SQInteger ret = ReadNumber();
245                                         RETURN_TOKEN(ret);
246                                 }
247                                 else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
248                                         SQInteger t = ReadID();
249                                         RETURN_TOKEN(t);
250                                 }
251                                 else {
252                                         SQInteger c = CUR_CHAR;
253                                         if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
254                                         NEXT();
255                                         RETURN_TOKEN(c);
256                                 }
257                                 RETURN_TOKEN(0);
258                         }
259                 }
260         }
261         return 0;
262 }
263
264 SQInteger SQLexer::GetIDType(SQChar *s)
265 {
266         SQObjectPtr t;
267         if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
268                 return SQInteger(_integer(t));
269         }
270         return TK_IDENTIFIER;
271 }
272
273
274 SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
275 {
276         INIT_TEMP_STRING();
277         NEXT();
278         if(IS_EOB()) return -1;
279         for(;;) {
280                 while(CUR_CHAR != ndelim) {
281                         switch(CUR_CHAR) {
282                         case SQUIRREL_EOB:
283                                 Error(_SC("unfinished string"));
284                                 return -1;
285                         case _SC('\n'):
286                                 if(!verbatim) Error(_SC("newline in a constant"));
287                                 APPEND_CHAR(CUR_CHAR); NEXT();
288                                 _currentline++;
289                                 break;
290                         case _SC('\\'):
291                                 if(verbatim) {
292                                         APPEND_CHAR('\\'); NEXT();
293                                 }
294                                 else {
295                                         NEXT();
296                                         switch(CUR_CHAR) {
297                                         case _SC('x'): NEXT(); {
298                                                 if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
299                                                 const SQInteger maxdigits = 4;
300                                                 SQChar temp[maxdigits+1];
301                                                 SQInteger n = 0;
302                                                 while(isxdigit(CUR_CHAR) && n < maxdigits) {
303                                                         temp[n] = CUR_CHAR;
304                                                         n++;
305                                                         NEXT();
306                                                 }
307                                                 temp[n] = 0;
308                                                 SQChar *sTemp;
309                                                 APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
310                                         }
311                                     break;
312                                         case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
313                                         case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
314                                         case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
315                                         case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
316                                         case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
317                                         case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
318                                         case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
319                                         case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
320                                         case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
321                                         case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
322                                         case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
323                                         default:
324                                                 Error(_SC("unrecognised escaper char"));
325                                         break;
326                                         }
327                                 }
328                                 break;
329                         default:
330                                 APPEND_CHAR(CUR_CHAR);
331                                 NEXT();
332                         }
333                 }
334                 NEXT();
335                 if(verbatim && CUR_CHAR == '"') { //double quotation
336                         APPEND_CHAR(CUR_CHAR);
337                         NEXT();
338                 }
339                 else {
340                         break;
341                 }
342         }
343         TERMINATE_BUFFER();
344         SQInteger len = _longstr.size()-1;
345         if(ndelim == _SC('\'')) {
346                 if(len == 0) Error(_SC("empty constant"));
347                 if(len > 1) Error(_SC("constant too long"));
348                 _nvalue = _longstr[0];
349                 return TK_INTEGER;
350         }
351         _svalue = &_longstr[0];
352         return TK_STRING_LITERAL;
353 }
354
355 void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
356 {
357         *res = 0;
358         while(*s != 0)
359         {
360                 if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
361                 else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
362                 else { assert(0); }
363         }
364 }
365
366 void LexInteger(const SQChar *s,SQUnsignedInteger *res)
367 {
368         *res = 0;
369         while(*s != 0)
370         {
371                 *res = (*res)*10+((*s++)-'0');
372         }
373 }
374
375 SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
376 #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
377 SQInteger SQLexer::ReadNumber()
378 {
379 #define TINT 1
380 #define TFLOAT 2
381 #define THEX 3
382 #define TSCIENTIFIC 4
383         SQInteger type = TINT, firstchar = CUR_CHAR;
384         SQChar *sTemp;
385         INIT_TEMP_STRING();
386         NEXT();
387         if(firstchar == _SC('0') && toupper(CUR_CHAR) == _SC('X')) {
388                 NEXT();
389                 type = THEX;
390                 while(isxdigit(CUR_CHAR)) {
391                         APPEND_CHAR(CUR_CHAR);
392                         NEXT();
393                 }
394                 if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
395         }
396         else {
397                 APPEND_CHAR((int)firstchar);
398                 while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
399             if(CUR_CHAR == _SC('.')) type = TFLOAT;
400                         if(isexponent(CUR_CHAR)) {
401                                 if(type != TFLOAT) Error(_SC("invalid numeric format"));
402                                 type = TSCIENTIFIC;
403                                 APPEND_CHAR(CUR_CHAR);
404                                 NEXT();
405                                 if(CUR_CHAR == '+' || CUR_CHAR == '-'){
406                                         APPEND_CHAR(CUR_CHAR);
407                                         NEXT();
408                                 }
409                                 if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
410                         }
411
412                         APPEND_CHAR(CUR_CHAR);
413                         NEXT();
414                 }
415         }
416         TERMINATE_BUFFER();
417         switch(type) {
418         case TSCIENTIFIC:
419         case TFLOAT:
420                 _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
421                 return TK_FLOAT;
422         case TINT:
423                 LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
424                 return TK_INTEGER;
425         case THEX:
426                 LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
427                 return TK_INTEGER;
428         }
429         return 0;
430 }
431
432 SQInteger SQLexer::ReadID()
433 {
434         SQInteger res;
435         INIT_TEMP_STRING();
436         do {
437                 APPEND_CHAR(CUR_CHAR);
438                 NEXT();
439         } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
440         TERMINATE_BUFFER();
441         res = GetIDType(&_longstr[0]);
442         if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
443                 _svalue = &_longstr[0];
444         }
445         return res;
446 }