50bc246f076e8345bdc2d1367fc333c78860893e
[supertux.git] / src / squirrel / squirrel / sqlexer.cpp
1 /*
2         see copyright notice in squirrel.h
3 */
4 #include "sqpcheader.h"
5 #include <ctype.h>
6 #include <stdlib.h>
7 #include "sqtable.h"
8 #include "sqstring.h"
9 #include "sqcompiler.h"
10 #include "sqlexer.h"
11
12 #define CUR_CHAR (_currdata)
13 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
14 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
15 #define NEXT() {Next();_currentcolumn++;}
16 #define INIT_TEMP_STRING() { _longstr.resize(0);}
17 #define APPEND_CHAR(c) { _longstr.push_back(c);}
18 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
19 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
20
21 SQLexer::SQLexer(){}
22 SQLexer::~SQLexer()
23 {
24         _keywords->Release();
25 }
26
27 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
28 {
29         _errfunc = efunc;
30         _errtarget = ed;
31         _sharedstate = ss;
32         _keywords = SQTable::Create(ss, 26);
33         ADD_KEYWORD(while, TK_WHILE);
34         ADD_KEYWORD(do, TK_DO);
35         ADD_KEYWORD(if, TK_IF);
36         ADD_KEYWORD(else, TK_ELSE);
37         ADD_KEYWORD(break, TK_BREAK);
38         ADD_KEYWORD(continue, TK_CONTINUE);
39         ADD_KEYWORD(return, TK_RETURN);
40         ADD_KEYWORD(null, TK_NULL);
41         ADD_KEYWORD(function, TK_FUNCTION);
42         ADD_KEYWORD(local, TK_LOCAL);
43         ADD_KEYWORD(for, TK_FOR);
44         ADD_KEYWORD(foreach, TK_FOREACH);
45         ADD_KEYWORD(in, TK_IN);
46         ADD_KEYWORD(typeof, TK_TYPEOF);
47         ADD_KEYWORD(delegate, TK_DELEGATE);
48         ADD_KEYWORD(delete, TK_DELETE);
49         ADD_KEYWORD(try, TK_TRY);
50         ADD_KEYWORD(catch, TK_CATCH);
51         ADD_KEYWORD(throw, TK_THROW);
52         ADD_KEYWORD(clone, TK_CLONE);
53         ADD_KEYWORD(yield, TK_YIELD);
54         ADD_KEYWORD(resume, TK_RESUME);
55         ADD_KEYWORD(switch, TK_SWITCH);
56         ADD_KEYWORD(case, TK_CASE);
57         ADD_KEYWORD(default, TK_DEFAULT);
58         ADD_KEYWORD(this, TK_THIS);
59         ADD_KEYWORD(parent,TK_PARENT);
60         ADD_KEYWORD(class,TK_CLASS);
61         ADD_KEYWORD(extends,TK_EXTENDS);
62         ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
63         ADD_KEYWORD(instanceof,TK_INSTANCEOF);
64         ADD_KEYWORD(vargc,TK_VARGC);
65         ADD_KEYWORD(vargv,TK_VARGV);
66         ADD_KEYWORD(true,TK_TRUE);
67         ADD_KEYWORD(false,TK_FALSE);
68         ADD_KEYWORD(static,TK_STATIC);
69         ADD_KEYWORD(enum,TK_ENUM);
70         ADD_KEYWORD(const,TK_CONST);
71
72         _readf = rg;
73         _up = up;
74         _lasttokenline = _currentline = 1;
75         _currentcolumn = 0;
76         _prevtoken = -1;
77         Next();
78 }
79
80 void SQLexer::Error(const SQChar *err)
81 {
82         _errfunc(_errtarget,err);
83 }
84
85 void SQLexer::Next()
86 {
87         SQInteger t = _readf(_up);
88         if(t > MAX_CHAR) Error(_SC("Invalid character"));
89         if(t != 0) {
90                 _currdata = (LexChar)t;
91                 return;
92         }
93         _currdata = SQUIRREL_EOB;
94 }
95
96 const SQChar *SQLexer::Tok2Str(SQInteger tok)
97 {
98         SQObjectPtr itr, key, val;
99         SQInteger nitr;
100         while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
101                 itr = (SQInteger)nitr;
102                 if(((SQInteger)_integer(val)) == tok)
103                         return _stringval(key);
104         }
105         return NULL;
106 }
107
108 void SQLexer::LexBlockComment()
109 {
110         bool done = false;
111         while(!done) {
112                 switch(CUR_CHAR) {
113                         case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
114                         case _SC('\n'): _currentline++; NEXT(); continue;
115                         case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
116                         default: NEXT();
117                 }
118         }
119 }
120
121 SQInteger SQLexer::Lex()
122 {
123         _lasttokenline = _currentline;
124         while(CUR_CHAR != SQUIRREL_EOB) {
125                 switch(CUR_CHAR){
126                 case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
127                 case _SC('\n'):
128                         _currentline++;
129                         _prevtoken=_curtoken;
130                         _curtoken=_SC('\n');
131                         NEXT();
132                         _currentcolumn=1;
133                         continue;
134                 case _SC('/'):
135                         NEXT();
136                         switch(CUR_CHAR){
137                         case _SC('*'):
138                                 NEXT();
139                                 LexBlockComment();
140                                 continue;       
141                         case _SC('/'):
142                                 do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
143                                 continue;
144                         case _SC('='):
145                                 NEXT();
146                                 RETURN_TOKEN(TK_DIVEQ);
147                                 continue;
148                         case _SC('>'):
149                                 NEXT();
150                                 RETURN_TOKEN(TK_ATTR_CLOSE);
151                                 continue;
152                         default:
153                                 RETURN_TOKEN('/');
154                         }
155                 case _SC('='):
156                         NEXT();
157                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
158                         else { NEXT(); RETURN_TOKEN(TK_EQ); }
159                 case _SC('<'):
160                         NEXT();
161                         if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) }
162                         else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); }
163                         else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); }
164                         else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); }
165                         //else if ( CUR_CHAR == _SC('[') ) { NEXT(); ReadMultilineString(); RETURN_TOKEN(TK_STRING_LITERAL); }
166                         else { RETURN_TOKEN('<') }
167                 case _SC('>'):
168                         NEXT();
169                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
170                         else if(CUR_CHAR == _SC('>')){ 
171                                 NEXT(); 
172                                 if(CUR_CHAR == _SC('>')){
173                                         NEXT();
174                                         RETURN_TOKEN(TK_USHIFTR);
175                                 }
176                                 RETURN_TOKEN(TK_SHIFTR);
177                         }
178                         else { RETURN_TOKEN('>') }
179                 case _SC('!'):
180                         NEXT();
181                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
182                         else { NEXT(); RETURN_TOKEN(TK_NE); }
183                 case _SC('@'): {
184                         SQInteger stype;
185                         NEXT(); 
186                         if(CUR_CHAR != _SC('"'))
187                                 Error(_SC("string expected"));
188                         if((stype=ReadString('"',true))!=-1) {
189                                 RETURN_TOKEN(stype);
190                         }
191                         Error(_SC("error parsing the string"));
192                                            }
193                 case _SC('"'):
194                 case _SC('\''): {
195                         SQInteger stype;
196                         if((stype=ReadString(CUR_CHAR,false))!=-1){
197                                 RETURN_TOKEN(stype);
198                         }
199                         Error(_SC("error parsing the string"));
200                         }
201                 case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
202                 case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
203                         {SQInteger ret = CUR_CHAR;
204                         NEXT(); RETURN_TOKEN(ret); }
205                 case _SC('.'):
206                         NEXT();
207                         if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
208                         NEXT();
209                         if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
210                         NEXT();
211                         RETURN_TOKEN(TK_VARPARAMS);
212                 case _SC('&'):
213                         NEXT();
214                         if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
215                         else { NEXT(); RETURN_TOKEN(TK_AND); }
216                 case _SC('|'):
217                         NEXT();
218                         if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
219                         else { NEXT(); RETURN_TOKEN(TK_OR); }
220                 case _SC(':'):
221                         NEXT();
222                         if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
223                         else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
224                 case _SC('*'):
225                         NEXT();
226                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
227                         else RETURN_TOKEN('*');
228                 case _SC('%'):
229                         NEXT();
230                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
231                         else RETURN_TOKEN('%');
232                 case _SC('-'):
233                         NEXT();
234                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
235                         else if  (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
236                         else RETURN_TOKEN('-');
237                 case _SC('+'):
238                         NEXT();
239                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
240                         else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
241                         else RETURN_TOKEN('+');
242                 case SQUIRREL_EOB:
243                         return 0;
244                 default:{
245                                 if (scisdigit(CUR_CHAR)) {
246                                         SQInteger ret = ReadNumber();
247                                         RETURN_TOKEN(ret);
248                                 }
249                                 else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
250                                         SQInteger t = ReadID();
251                                         RETURN_TOKEN(t);
252                                 }
253                                 else {
254                                         SQInteger c = CUR_CHAR;
255                                         if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
256                                         NEXT();
257                                         RETURN_TOKEN(c);  
258                                 }
259                                 RETURN_TOKEN(0);
260                         }
261                 }
262         }
263         return 0;    
264 }
265         
266 SQInteger SQLexer::GetIDType(SQChar *s)
267 {
268         SQObjectPtr t;
269         if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
270                 return SQInteger(_integer(t));
271         }
272         return TK_IDENTIFIER;
273 }
274
275
276 SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)
277 {
278         INIT_TEMP_STRING();
279         NEXT();
280         if(IS_EOB()) return -1;
281         for(;;) {
282                 while(CUR_CHAR != ndelim) {
283                         switch(CUR_CHAR) {
284                         case SQUIRREL_EOB:
285                                 Error(_SC("unfinished string"));
286                                 return -1;
287                         case _SC('\n'): 
288                                 if(!verbatim) Error(_SC("newline in a constant")); 
289                                 APPEND_CHAR(CUR_CHAR); NEXT(); 
290                                 _currentline++;
291                                 break;
292                         case _SC('\\'):
293                                 if(verbatim) {
294                                         APPEND_CHAR('\\'); NEXT(); 
295                                 }
296                                 else {
297                                         NEXT();
298                                         switch(CUR_CHAR) {
299                                         case _SC('x'): NEXT(); {
300                                                 if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected")); 
301                                                 const SQInteger maxdigits = 4;
302                                                 SQChar temp[maxdigits+1];
303                                                 SQInteger n = 0;
304                                                 while(isxdigit(CUR_CHAR) && n < maxdigits) {
305                                                         temp[n] = CUR_CHAR;
306                                                         n++;
307                                                         NEXT();
308                                                 }
309                                                 temp[n] = 0;
310                                                 SQChar *sTemp;
311                                                 APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
312                                         }
313                                     break;
314                                         case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
315                                         case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
316                                         case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
317                                         case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
318                                         case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
319                                         case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
320                                         case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
321                                         case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
322                                         case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
323                                         case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
324                                         case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
325                                         default:
326                                                 Error(_SC("unrecognised escaper char"));
327                                         break;
328                                         }
329                                 }
330                                 break;
331                         default:
332                                 APPEND_CHAR(CUR_CHAR);
333                                 NEXT();
334                         }
335                 }
336                 NEXT();
337                 if(verbatim && CUR_CHAR == '"') { //double quotation
338                         APPEND_CHAR(CUR_CHAR);
339                         NEXT();
340                 }
341                 else {
342                         break;
343                 }
344         }
345         TERMINATE_BUFFER();
346         SQInteger len = _longstr.size()-1;
347         if(ndelim == _SC('\'')) {
348                 if(len == 0) Error(_SC("empty constant"));
349                 if(len > 1) Error(_SC("constant too long"));
350                 _nvalue = _longstr[0];
351                 return TK_INTEGER;
352         }
353         _svalue = &_longstr[0];
354         return TK_STRING_LITERAL;
355 }
356
357 void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
358 {
359         *res = 0;
360         while(*s != 0)
361         {
362                 if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
363                 else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
364                 else { assert(0); }
365         }
366 }
367
368 void LexInteger(const SQChar *s,SQUnsignedInteger *res)
369 {
370         *res = 0;
371         while(*s != 0)
372         {
373                 *res = (*res)*10+((*s++)-'0');
374         }
375 }
376
377 SQInteger scisodigit(SQInteger c) { return c >= _SC('0') && c <= _SC('7'); }
378
379 void LexOctal(const SQChar *s,SQUnsignedInteger *res)
380 {
381         *res = 0;
382         while(*s != 0)
383         {
384                 if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
385                 else { assert(0); }
386         }
387 }
388
389 SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
390
391
392 #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
393 SQInteger SQLexer::ReadNumber()
394 {
395 #define TINT 1
396 #define TFLOAT 2
397 #define THEX 3
398 #define TSCIENTIFIC 4
399 #define TOCTAL 5
400         SQInteger type = TINT, firstchar = CUR_CHAR;
401         SQChar *sTemp;
402         INIT_TEMP_STRING();
403         NEXT();
404         if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
405                 if(scisodigit(CUR_CHAR)) {
406                         type = TOCTAL;
407                         while(scisodigit(CUR_CHAR)) {
408                                 APPEND_CHAR(CUR_CHAR);
409                                 NEXT();
410                         }
411                         if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
412                 }
413                 else {
414                         NEXT();
415                         type = THEX;
416                         while(isxdigit(CUR_CHAR)) {
417                                 APPEND_CHAR(CUR_CHAR);
418                                 NEXT();
419                         }
420                         if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
421                 }
422         }
423         else {
424                 APPEND_CHAR((int)firstchar);
425                 while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
426             if(CUR_CHAR == _SC('.')) type = TFLOAT;
427                         if(isexponent(CUR_CHAR)) {
428                                 if(type != TFLOAT) Error(_SC("invalid numeric format"));
429                                 type = TSCIENTIFIC;
430                                 APPEND_CHAR(CUR_CHAR);
431                                 NEXT();
432                                 if(CUR_CHAR == '+' || CUR_CHAR == '-'){
433                                         APPEND_CHAR(CUR_CHAR);
434                                         NEXT();
435                                 }
436                                 if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
437                         }
438                         
439                         APPEND_CHAR(CUR_CHAR);
440                         NEXT();
441                 }
442         }
443         TERMINATE_BUFFER();
444         switch(type) {
445         case TSCIENTIFIC:
446         case TFLOAT:
447                 _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
448                 return TK_FLOAT;
449         case TINT:
450                 LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
451                 return TK_INTEGER;
452         case THEX:
453                 LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
454                 return TK_INTEGER;
455         case TOCTAL:
456                 LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
457                 return TK_INTEGER;
458         }
459         return 0;
460 }
461
462 SQInteger SQLexer::ReadID()
463 {
464         SQInteger res;
465         INIT_TEMP_STRING();
466         do {
467                 APPEND_CHAR(CUR_CHAR);
468                 NEXT();
469         } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
470         TERMINATE_BUFFER();
471         res = GetIDType(&_longstr[0]);
472         if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
473                 _svalue = &_longstr[0];
474         }
475         return res;
476 }