updated squirrel version
[supertux.git] / src / squirrel / squirrel / sqlexer.cpp
1 /*
2         see copyright notice in squirrel.h
3 */
4 #include "sqpcheader.h"
5 #include <ctype.h>
6 #include <stdlib.h>
7 #include "sqtable.h"
8 #include "sqstring.h"
9 #include "sqcompiler.h"
10 #include "sqlexer.h"
11
12 #define CUR_CHAR (_currdata)
13 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
14 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
15 #define NEXT() {Next();_currentcolumn++;}
16 #define INIT_TEMP_STRING() { _longstr.resize(0);}
17 #define APPEND_CHAR(c) { _longstr.push_back(c);}
18 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
19 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
20
21 SQLexer::SQLexer(){}
22 SQLexer::~SQLexer()
23 {
24         _keywords->Release();
25 }
26
27 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
28 {
29         _errfunc = efunc;
30         _errtarget = ed;
31         _sharedstate = ss;
32         _keywords = SQTable::Create(ss, 26);
33         ADD_KEYWORD(while, TK_WHILE);
34         ADD_KEYWORD(do, TK_DO);
35         ADD_KEYWORD(if, TK_IF);
36         ADD_KEYWORD(else, TK_ELSE);
37         ADD_KEYWORD(break, TK_BREAK);
38         ADD_KEYWORD(continue, TK_CONTINUE);
39         ADD_KEYWORD(return, TK_RETURN);
40         ADD_KEYWORD(null, TK_NULL);
41         ADD_KEYWORD(function, TK_FUNCTION);
42         ADD_KEYWORD(local, TK_LOCAL);
43         ADD_KEYWORD(for, TK_FOR);
44         ADD_KEYWORD(foreach, TK_FOREACH);
45         ADD_KEYWORD(in, TK_IN);
46         ADD_KEYWORD(typeof, TK_TYPEOF);
47         ADD_KEYWORD(delegate, TK_DELEGATE);
48         ADD_KEYWORD(delete, TK_DELETE);
49         ADD_KEYWORD(try, TK_TRY);
50         ADD_KEYWORD(catch, TK_CATCH);
51         ADD_KEYWORD(throw, TK_THROW);
52         ADD_KEYWORD(clone, TK_CLONE);
53         ADD_KEYWORD(yield, TK_YIELD);
54         ADD_KEYWORD(resume, TK_RESUME);
55         ADD_KEYWORD(switch, TK_SWITCH);
56         ADD_KEYWORD(case, TK_CASE);
57         ADD_KEYWORD(default, TK_DEFAULT);
58         ADD_KEYWORD(this, TK_THIS);
59         ADD_KEYWORD(parent,TK_PARENT);
60         ADD_KEYWORD(class,TK_CLASS);
61         ADD_KEYWORD(extends,TK_EXTENDS);
62         ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
63         ADD_KEYWORD(instanceof,TK_INSTANCEOF);
64         ADD_KEYWORD(vargc,TK_VARGC);
65         ADD_KEYWORD(vargv,TK_VARGV);
66         ADD_KEYWORD(true,TK_TRUE);
67         ADD_KEYWORD(false,TK_FALSE);
68
69         _readf = rg;
70         _up = up;
71         _lasttokenline = _currentline = 1;
72         _currentcolumn = 0;
73         _prevtoken = -1;
74         Next();
75 }
76
77 void SQLexer::Error(const SQChar *err)
78 {
79         _errfunc(_errtarget,err);
80 }
81
82 void SQLexer::Next()
83 {
84         SQInteger t = _readf(_up);
85         if(t > MAX_CHAR) Error(_SC("Invalid character"));
86         if(t != 0) {
87                 _currdata = t;
88                 return;
89         }
90         _currdata = SQUIRREL_EOB;
91 }
92
93 const SQChar *SQLexer::Tok2Str(int tok)
94 {
95         SQObjectPtr itr, key, val;
96         int nitr;
97         while((nitr = _keywords->Next(itr, key, val)) != -1) {
98                 itr = (SQInteger)nitr;
99                 if(((int)_integer(val)) == tok)
100                         return _stringval(key);
101         }
102         return NULL;
103 }
104
105 void SQLexer::LexBlockComment()
106 {
107         bool done = false;
108         while(!done) {
109                 switch(CUR_CHAR) {
110                         case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
111                         //case _SC('/'): { NEXT(); if(CUR_CHAR == _SC('*')) { nest++; NEXT(); }}; continue;
112                         case _SC('\n'): _currentline++; NEXT(); continue;
113                         case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
114                         default: NEXT();
115                 }
116         }
117 }
118
119 int SQLexer::Lex()
120 {
121         _lasttokenline = _currentline;
122         while(CUR_CHAR != SQUIRREL_EOB) {
123                 switch(CUR_CHAR){
124                 case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
125                 case _SC('\n'):
126                         _currentline++;
127                         _prevtoken=_curtoken;
128                         _curtoken=_SC('\n');
129                         NEXT();
130                         _currentcolumn=1;
131                         continue;
132                 case _SC('/'):
133                         NEXT();
134                         switch(CUR_CHAR){
135                         case _SC('*'):
136                                 NEXT();
137                                 LexBlockComment();
138                                 continue;       
139                         case _SC('/'):
140                                 do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
141                                 continue;
142                         case _SC('='):
143                                 NEXT();
144                                 RETURN_TOKEN(TK_DIVEQ);
145                                 continue;
146                         case _SC('>'):
147                                 NEXT();
148                                 RETURN_TOKEN(TK_ATTR_CLOSE);
149                                 continue;
150                         default:
151                                 RETURN_TOKEN('/');
152                         }
153                 case _SC('='):
154                         NEXT();
155                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
156                         else { NEXT(); RETURN_TOKEN(TK_EQ); }
157                 case _SC('<'):
158                         NEXT();
159                         if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) }
160                         else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); }
161                         else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); }
162                         else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); }
163                         //else if ( CUR_CHAR == _SC('[') ) { NEXT(); ReadMultilineString(); RETURN_TOKEN(TK_STRING_LITERAL); }
164                         else { RETURN_TOKEN('<') }
165                 case _SC('>'):
166                         NEXT();
167                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
168                         else if(CUR_CHAR == _SC('>')){ 
169                                 NEXT(); 
170                                 if(CUR_CHAR == _SC('>')){
171                                         NEXT();
172                                         RETURN_TOKEN(TK_USHIFTR);
173                                 }
174                                 RETURN_TOKEN(TK_SHIFTR);
175                         }
176                         else { RETURN_TOKEN('>') }
177                 case _SC('!'):
178                         NEXT();
179                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
180                         else { NEXT(); RETURN_TOKEN(TK_NE); }
181                 case _SC('@'): {
182                         int stype;
183                         NEXT(); 
184                         if(CUR_CHAR != _SC('"'))
185                                 Error(_SC("string expected"));
186                         if((stype=ReadString('"',true))!=-1) {
187                                 RETURN_TOKEN(stype);
188                         }
189                         Error(_SC("error parsing the string"));
190                                            }
191                 case _SC('"'):
192                 case _SC('\''): {
193                         int stype;
194                         if((stype=ReadString(CUR_CHAR,false))!=-1){
195                                 RETURN_TOKEN(stype);
196                         }
197                         Error(_SC("error parsing the string"));
198                         }
199                 case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
200                 case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
201                         {int ret = CUR_CHAR;
202                         NEXT(); RETURN_TOKEN(ret); }
203                 case _SC('.'):
204                         NEXT();
205                         if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
206                         NEXT();
207                         if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
208                         NEXT();
209                         RETURN_TOKEN(TK_VARPARAMS);
210                 case _SC('&'):
211                         NEXT();
212                         if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
213                         else { NEXT(); RETURN_TOKEN(TK_AND); }
214                 case _SC('|'):
215                         NEXT();
216                         if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
217                         else { NEXT(); RETURN_TOKEN(TK_OR); }
218                 case _SC(':'):
219                         NEXT();
220                         if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
221                         else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
222                 case _SC('*'):
223                         NEXT();
224                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
225                         else RETURN_TOKEN('*');
226                 case _SC('%'):
227                         NEXT();
228                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
229                         else RETURN_TOKEN('%');
230                 case _SC('-'):
231                         NEXT();
232                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
233                         else if  (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
234                         else RETURN_TOKEN('-');
235                 case _SC('+'):
236                         NEXT();
237                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
238                         else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
239                         else RETURN_TOKEN('+');
240                 case SQUIRREL_EOB:
241                         return 0;
242                 default:{
243                                 if (scisdigit(CUR_CHAR)) {
244                                         int ret = ReadNumber();
245                                         RETURN_TOKEN(ret);
246                                 }
247                                 else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
248                                         int t = ReadID();
249                                         RETURN_TOKEN(t);
250                                 }
251                                 else {
252                                         int c = CUR_CHAR;
253                                         if (sciscntrl(c)) Error(_SC("unexpected character(control)"));
254                                         NEXT();
255                                         RETURN_TOKEN(c);  
256                                 }
257                                 RETURN_TOKEN(0);
258                         }
259                 }
260         }
261         return 0;    
262 }
263         
264 int SQLexer::GetIDType(SQChar *s)
265 {
266         SQObjectPtr t;
267         if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
268                 return int(_integer(t));
269         }
270         return TK_IDENTIFIER;
271 }
272
273
274 int SQLexer::ReadString(int ndelim,bool verbatim)
275 {
276         INIT_TEMP_STRING();
277         NEXT();
278         if(IS_EOB()) return -1;
279         for(;;) {
280                 while(CUR_CHAR != ndelim) {
281                         switch(CUR_CHAR) {
282                         case SQUIRREL_EOB:
283                                 Error(_SC("unfinished string"));
284                                 return -1;
285                         case _SC('\n'): 
286                                 if(!verbatim) Error(_SC("newline in a constant")); 
287                                 APPEND_CHAR(CUR_CHAR); NEXT(); 
288                                 break;
289                         case _SC('\\'):
290                                 if(verbatim) {
291                                         APPEND_CHAR('\\'); NEXT(); 
292                                 }
293                                 else {
294                                         NEXT();
295                                         switch(CUR_CHAR) {
296                                         case _SC('x'): NEXT(); {
297                                                 if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected")); 
298                                                 const int maxdigits = 4;
299                                                 SQChar temp[maxdigits+1];
300                                                 int n = 0;
301                                                 while(isxdigit(CUR_CHAR) && n < maxdigits) {
302                                                         temp[n] = CUR_CHAR;
303                                                         n++;
304                                                         NEXT();
305                                                 }
306                                                 temp[n] = 0;
307                                                 SQChar *sTemp;
308                                                 APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
309                                         }
310                                     break;
311                                         case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
312                                         case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
313                                         case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
314                                         case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
315                                         case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
316                                         case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
317                                         case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
318                                         case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
319                                         case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
320                                         case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
321                                         case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
322                                         default:
323                                                 Error(_SC("unrecognised escaper char"));
324                                         break;
325                                         }
326                                 }
327                                 break;
328                         default:
329                                 APPEND_CHAR(CUR_CHAR);
330                                 NEXT();
331                         }
332                 }
333                 NEXT();
334                 if(verbatim && CUR_CHAR == '"') { //double quotation
335                         APPEND_CHAR(CUR_CHAR);
336                         NEXT();
337                 }
338                 else {
339                         break;
340                 }
341         }
342         TERMINATE_BUFFER();
343         int len = _longstr.size()-1;
344         if(ndelim == _SC('\'')) {
345                 if(len == 0) Error(_SC("empty constant"));
346                 if(len > 1) Error(_SC("constant too long"));
347                 _nvalue = _longstr[0];
348                 return TK_INTEGER;
349         }
350         _svalue = &_longstr[0];
351         return TK_STRING_LITERAL;
352 }
353
354 int isexponent(int c) { return c == 'e' || c=='E'; }
355
356 int SQLexer::ReadNumber()
357 {
358 #define TINT 1
359 #define TFLOAT 2
360 #define THEX 3
361 #define TSCIENTIFIC 4
362         int type = TINT, firstchar = CUR_CHAR;
363         bool isfloat = false;
364         SQChar *sTemp;
365         INIT_TEMP_STRING();
366         NEXT();
367         if(firstchar == _SC('0') && toupper(CUR_CHAR) == _SC('X')) {
368                 NEXT();
369                 type = THEX;
370                 while(isxdigit(CUR_CHAR)) {
371                         APPEND_CHAR(CUR_CHAR);
372                         NEXT();
373                 }
374                 if(_longstr.size() > 8) Error(_SC("Hex number over 8 digits"));
375         }
376         else {
377                 APPEND_CHAR(firstchar);
378                 while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
379             if(CUR_CHAR == _SC('.')) type = TFLOAT;
380                         if(isexponent(CUR_CHAR)) {
381                                 if(type != TFLOAT) Error(_SC("invalid numeric format"));
382                                 type = TSCIENTIFIC;
383                                 APPEND_CHAR(CUR_CHAR);
384                                 NEXT();
385                                 if(CUR_CHAR == '+' || CUR_CHAR == '-'){
386                                         APPEND_CHAR(CUR_CHAR);
387                                         NEXT();
388                                 }
389                                 if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
390                         }
391                         
392                         APPEND_CHAR(CUR_CHAR);
393                         NEXT();
394                 }
395         }
396         TERMINATE_BUFFER();
397         switch(type) {
398         case TSCIENTIFIC:
399         case TFLOAT:
400                 _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
401                 return TK_FLOAT;
402         case TINT:
403                 _nvalue = (SQInteger)scatoi(&_longstr[0]);
404                 return TK_INTEGER;
405         case THEX:
406                 *((unsigned long *)&_nvalue) = scstrtoul(&_longstr[0],&sTemp,16);
407                 return TK_INTEGER;
408         }
409         return 0;
410 }
411
412 int SQLexer::ReadID()
413 {
414         int res, size = 0;
415         INIT_TEMP_STRING();
416         do {
417                 APPEND_CHAR(CUR_CHAR);
418                 NEXT();
419         } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
420         TERMINATE_BUFFER();
421         res = GetIDType(&_longstr[0]);
422         if(res == TK_IDENTIFIER) {
423                 _svalue = &_longstr[0];
424         }
425         return res;
426 }