7244e73acd6971bcccc9456b42e32548e122063c
[supertux.git] / src / squirrel / squirrel / sqlexer.cpp
1 /*\r
2         see copyright notice in squirrel.h\r
3 */\r
4 #include "sqpcheader.h"\r
5 #include <ctype.h>\r
6 #include <stdlib.h>\r
7 #include "sqtable.h"\r
8 #include "sqstring.h"\r
9 #include "sqcompiler.h"\r
10 #include "sqlexer.h"\r
11 \r
12 #define CUR_CHAR (_currdata)\r
13 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}\r
14 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)\r
15 #define NEXT() {Next();_currentcolumn++;}\r
16 #define INIT_TEMP_STRING() { _longstr.resize(0);}\r
17 #define APPEND_CHAR(c) { _longstr.push_back(c);}\r
18 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}\r
19 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))\r
20 \r
21 SQLexer::SQLexer(){}\r
22 SQLexer::~SQLexer()\r
23 {\r
24         _keywords->Release();\r
25 }\r
26 \r
27 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)\r
28 {\r
29         _errfunc = efunc;\r
30         _errtarget = ed;\r
31         _sharedstate = ss;\r
32         _keywords = SQTable::Create(ss, 26);\r
33         ADD_KEYWORD(while, TK_WHILE);\r
34         ADD_KEYWORD(do, TK_DO);\r
35         ADD_KEYWORD(if, TK_IF);\r
36         ADD_KEYWORD(else, TK_ELSE);\r
37         ADD_KEYWORD(break, TK_BREAK);\r
38         ADD_KEYWORD(continue, TK_CONTINUE);\r
39         ADD_KEYWORD(return, TK_RETURN);\r
40         ADD_KEYWORD(null, TK_NULL);\r
41         ADD_KEYWORD(function, TK_FUNCTION);\r
42         ADD_KEYWORD(local, TK_LOCAL);\r
43         ADD_KEYWORD(for, TK_FOR);\r
44         ADD_KEYWORD(foreach, TK_FOREACH);\r
45         ADD_KEYWORD(in, TK_IN);\r
46         ADD_KEYWORD(typeof, TK_TYPEOF);\r
47         ADD_KEYWORD(delegate, TK_DELEGATE);\r
48         ADD_KEYWORD(delete, TK_DELETE);\r
49         ADD_KEYWORD(try, TK_TRY);\r
50         ADD_KEYWORD(catch, TK_CATCH);\r
51         ADD_KEYWORD(throw, TK_THROW);\r
52         ADD_KEYWORD(clone, TK_CLONE);\r
53         ADD_KEYWORD(yield, TK_YIELD);\r
54         ADD_KEYWORD(resume, TK_RESUME);\r
55         ADD_KEYWORD(switch, TK_SWITCH);\r
56         ADD_KEYWORD(case, TK_CASE);\r
57         ADD_KEYWORD(default, TK_DEFAULT);\r
58         ADD_KEYWORD(this, TK_THIS);\r
59         ADD_KEYWORD(parent,TK_PARENT);\r
60         ADD_KEYWORD(class,TK_CLASS);\r
61         ADD_KEYWORD(extends,TK_EXTENDS);\r
62         ADD_KEYWORD(constructor,TK_CONSTRUCTOR);\r
63         ADD_KEYWORD(instanceof,TK_INSTANCEOF);\r
64         ADD_KEYWORD(vargc,TK_VARGC);\r
65         ADD_KEYWORD(vargv,TK_VARGV);\r
66         ADD_KEYWORD(true,TK_TRUE);\r
67         ADD_KEYWORD(false,TK_FALSE);\r
68         ADD_KEYWORD(static,TK_STATIC);\r
69 \r
70         _readf = rg;\r
71         _up = up;\r
72         _lasttokenline = _currentline = 1;\r
73         _currentcolumn = 0;\r
74         _prevtoken = -1;\r
75         Next();\r
76 }\r
77 \r
78 void SQLexer::Error(const SQChar *err)\r
79 {\r
80         _errfunc(_errtarget,err);\r
81 }\r
82 \r
83 void SQLexer::Next()\r
84 {\r
85         SQInteger t = _readf(_up);\r
86         if(t > MAX_CHAR) Error(_SC("Invalid character"));\r
87         if(t != 0) {\r
88                 _currdata = (LexChar)t;\r
89                 return;\r
90         }\r
91         _currdata = SQUIRREL_EOB;\r
92 }\r
93 \r
94 const SQChar *SQLexer::Tok2Str(SQInteger tok)\r
95 {\r
96         SQObjectPtr itr, key, val;\r
97         SQInteger nitr;\r
98         while((nitr = _keywords->Next(false,itr, key, val)) != -1) {\r
99                 itr = (SQInteger)nitr;\r
100                 if(((SQInteger)_integer(val)) == tok)\r
101                         return _stringval(key);\r
102         }\r
103         return NULL;\r
104 }\r
105 \r
106 void SQLexer::LexBlockComment()\r
107 {\r
108         bool done = false;\r
109         while(!done) {\r
110                 switch(CUR_CHAR) {\r
111                         case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;\r
112                         case _SC('\n'): _currentline++; NEXT(); continue;\r
113                         case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));\r
114                         default: NEXT();\r
115                 }\r
116         }\r
117 }\r
118 \r
119 SQInteger SQLexer::Lex()\r
120 {\r
121         _lasttokenline = _currentline;\r
122         while(CUR_CHAR != SQUIRREL_EOB) {\r
123                 switch(CUR_CHAR){\r
124                 case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;\r
125                 case _SC('\n'):\r
126                         _currentline++;\r
127                         _prevtoken=_curtoken;\r
128                         _curtoken=_SC('\n');\r
129                         NEXT();\r
130                         _currentcolumn=1;\r
131                         continue;\r
132                 case _SC('/'):\r
133                         NEXT();\r
134                         switch(CUR_CHAR){\r
135                         case _SC('*'):\r
136                                 NEXT();\r
137                                 LexBlockComment();\r
138                                 continue;       \r
139                         case _SC('/'):\r
140                                 do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));\r
141                                 continue;\r
142                         case _SC('='):\r
143                                 NEXT();\r
144                                 RETURN_TOKEN(TK_DIVEQ);\r
145                                 continue;\r
146                         case _SC('>'):\r
147                                 NEXT();\r
148                                 RETURN_TOKEN(TK_ATTR_CLOSE);\r
149                                 continue;\r
150                         default:\r
151                                 RETURN_TOKEN('/');\r
152                         }\r
153                 case _SC('='):\r
154                         NEXT();\r
155                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }\r
156                         else { NEXT(); RETURN_TOKEN(TK_EQ); }\r
157                 case _SC('<'):\r
158                         NEXT();\r
159                         if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) }\r
160                         else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); }\r
161                         else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); }\r
162                         else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); }\r
163                         //else if ( CUR_CHAR == _SC('[') ) { NEXT(); ReadMultilineString(); RETURN_TOKEN(TK_STRING_LITERAL); }\r
164                         else { RETURN_TOKEN('<') }\r
165                 case _SC('>'):\r
166                         NEXT();\r
167                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}\r
168                         else if(CUR_CHAR == _SC('>')){ \r
169                                 NEXT(); \r
170                                 if(CUR_CHAR == _SC('>')){\r
171                                         NEXT();\r
172                                         RETURN_TOKEN(TK_USHIFTR);\r
173                                 }\r
174                                 RETURN_TOKEN(TK_SHIFTR);\r
175                         }\r
176                         else { RETURN_TOKEN('>') }\r
177                 case _SC('!'):\r
178                         NEXT();\r
179                         if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}\r
180                         else { NEXT(); RETURN_TOKEN(TK_NE); }\r
181                 case _SC('@'): {\r
182                         SQInteger stype;\r
183                         NEXT(); \r
184                         if(CUR_CHAR != _SC('"'))\r
185                                 Error(_SC("string expected"));\r
186                         if((stype=ReadString('"',true))!=-1) {\r
187                                 RETURN_TOKEN(stype);\r
188                         }\r
189                         Error(_SC("error parsing the string"));\r
190                                            }\r
191                 case _SC('"'):\r
192                 case _SC('\''): {\r
193                         SQInteger stype;\r
194                         if((stype=ReadString(CUR_CHAR,false))!=-1){\r
195                                 RETURN_TOKEN(stype);\r
196                         }\r
197                         Error(_SC("error parsing the string"));\r
198                         }\r
199                 case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):\r
200                 case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):\r
201                         {SQInteger ret = CUR_CHAR;\r
202                         NEXT(); RETURN_TOKEN(ret); }\r
203                 case _SC('.'):\r
204                         NEXT();\r
205                         if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }\r
206                         NEXT();\r
207                         if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }\r
208                         NEXT();\r
209                         RETURN_TOKEN(TK_VARPARAMS);\r
210                 case _SC('&'):\r
211                         NEXT();\r
212                         if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }\r
213                         else { NEXT(); RETURN_TOKEN(TK_AND); }\r
214                 case _SC('|'):\r
215                         NEXT();\r
216                         if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }\r
217                         else { NEXT(); RETURN_TOKEN(TK_OR); }\r
218                 case _SC(':'):\r
219                         NEXT();\r
220                         if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }\r
221                         else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }\r
222                 case _SC('*'):\r
223                         NEXT();\r
224                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}\r
225                         else RETURN_TOKEN('*');\r
226                 case _SC('%'):\r
227                         NEXT();\r
228                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}\r
229                         else RETURN_TOKEN('%');\r
230                 case _SC('-'):\r
231                         NEXT();\r
232                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}\r
233                         else if  (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}\r
234                         else RETURN_TOKEN('-');\r
235                 case _SC('+'):\r
236                         NEXT();\r
237                         if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}\r
238                         else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}\r
239                         else RETURN_TOKEN('+');\r
240                 case SQUIRREL_EOB:\r
241                         return 0;\r
242                 default:{\r
243                                 if (scisdigit(CUR_CHAR)) {\r
244                                         SQInteger ret = ReadNumber();\r
245                                         RETURN_TOKEN(ret);\r
246                                 }\r
247                                 else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {\r
248                                         SQInteger t = ReadID();\r
249                                         RETURN_TOKEN(t);\r
250                                 }\r
251                                 else {\r
252                                         SQInteger c = CUR_CHAR;\r
253                                         if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));\r
254                                         NEXT();\r
255                                         RETURN_TOKEN(c);  \r
256                                 }\r
257                                 RETURN_TOKEN(0);\r
258                         }\r
259                 }\r
260         }\r
261         return 0;    \r
262 }\r
263         \r
264 SQInteger SQLexer::GetIDType(SQChar *s)\r
265 {\r
266         SQObjectPtr t;\r
267         if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {\r
268                 return SQInteger(_integer(t));\r
269         }\r
270         return TK_IDENTIFIER;\r
271 }\r
272 \r
273 \r
274 SQInteger SQLexer::ReadString(SQInteger ndelim,bool verbatim)\r
275 {\r
276         INIT_TEMP_STRING();\r
277         NEXT();\r
278         if(IS_EOB()) return -1;\r
279         for(;;) {\r
280                 while(CUR_CHAR != ndelim) {\r
281                         switch(CUR_CHAR) {\r
282                         case SQUIRREL_EOB:\r
283                                 Error(_SC("unfinished string"));\r
284                                 return -1;\r
285                         case _SC('\n'): \r
286                                 if(!verbatim) Error(_SC("newline in a constant")); \r
287                                 APPEND_CHAR(CUR_CHAR); NEXT(); \r
288                                 _currentline++;\r
289                                 break;\r
290                         case _SC('\\'):\r
291                                 if(verbatim) {\r
292                                         APPEND_CHAR('\\'); NEXT(); \r
293                                 }\r
294                                 else {\r
295                                         NEXT();\r
296                                         switch(CUR_CHAR) {\r
297                                         case _SC('x'): NEXT(); {\r
298                                                 if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected")); \r
299                                                 const SQInteger maxdigits = 4;\r
300                                                 SQChar temp[maxdigits+1];\r
301                                                 SQInteger n = 0;\r
302                                                 while(isxdigit(CUR_CHAR) && n < maxdigits) {\r
303                                                         temp[n] = CUR_CHAR;\r
304                                                         n++;\r
305                                                         NEXT();\r
306                                                 }\r
307                                                 temp[n] = 0;\r
308                                                 SQChar *sTemp;\r
309                                                 APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));\r
310                                         }\r
311                                     break;\r
312                                         case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;\r
313                                         case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;\r
314                                         case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;\r
315                                         case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;\r
316                                         case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;\r
317                                         case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;\r
318                                         case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;\r
319                                         case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;\r
320                                         case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;\r
321                                         case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;\r
322                                         case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;\r
323                                         default:\r
324                                                 Error(_SC("unrecognised escaper char"));\r
325                                         break;\r
326                                         }\r
327                                 }\r
328                                 break;\r
329                         default:\r
330                                 APPEND_CHAR(CUR_CHAR);\r
331                                 NEXT();\r
332                         }\r
333                 }\r
334                 NEXT();\r
335                 if(verbatim && CUR_CHAR == '"') { //double quotation\r
336                         APPEND_CHAR(CUR_CHAR);\r
337                         NEXT();\r
338                 }\r
339                 else {\r
340                         break;\r
341                 }\r
342         }\r
343         TERMINATE_BUFFER();\r
344         SQInteger len = _longstr.size()-1;\r
345         if(ndelim == _SC('\'')) {\r
346                 if(len == 0) Error(_SC("empty constant"));\r
347                 if(len > 1) Error(_SC("constant too long"));\r
348                 _nvalue = _longstr[0];\r
349                 return TK_INTEGER;\r
350         }\r
351         _svalue = &_longstr[0];\r
352         return TK_STRING_LITERAL;\r
353 }\r
354 \r
355 void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)\r
356 {\r
357         *res = 0;\r
358         while(*s != 0)\r
359         {\r
360                 if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');\r
361                 else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);\r
362                 else { assert(0); }\r
363         }\r
364 }\r
365 \r
366 void LexInteger(const SQChar *s,SQUnsignedInteger *res)\r
367 {\r
368         *res = 0;\r
369         while(*s != 0)\r
370         {\r
371                 *res = (*res)*10+((*s++)-'0');\r
372         }\r
373 }\r
374 \r
375 SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }\r
376 #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)\r
377 SQInteger SQLexer::ReadNumber()\r
378 {\r
379 #define TINT 1\r
380 #define TFLOAT 2\r
381 #define THEX 3\r
382 #define TSCIENTIFIC 4\r
383         SQInteger type = TINT, firstchar = CUR_CHAR;\r
384         SQChar *sTemp;\r
385         INIT_TEMP_STRING();\r
386         NEXT();\r
387         if(firstchar == _SC('0') && toupper(CUR_CHAR) == _SC('X')) {\r
388                 NEXT();\r
389                 type = THEX;\r
390                 while(isxdigit(CUR_CHAR)) {\r
391                         APPEND_CHAR(CUR_CHAR);\r
392                         NEXT();\r
393                 }\r
394                 if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));\r
395         }\r
396         else {\r
397                 APPEND_CHAR((int)firstchar);\r
398                 while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {\r
399             if(CUR_CHAR == _SC('.')) type = TFLOAT;\r
400                         if(isexponent(CUR_CHAR)) {\r
401                                 if(type != TFLOAT) Error(_SC("invalid numeric format"));\r
402                                 type = TSCIENTIFIC;\r
403                                 APPEND_CHAR(CUR_CHAR);\r
404                                 NEXT();\r
405                                 if(CUR_CHAR == '+' || CUR_CHAR == '-'){\r
406                                         APPEND_CHAR(CUR_CHAR);\r
407                                         NEXT();\r
408                                 }\r
409                                 if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));\r
410                         }\r
411                         \r
412                         APPEND_CHAR(CUR_CHAR);\r
413                         NEXT();\r
414                 }\r
415         }\r
416         TERMINATE_BUFFER();\r
417         switch(type) {\r
418         case TSCIENTIFIC:\r
419         case TFLOAT:\r
420                 _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);\r
421                 return TK_FLOAT;\r
422         case TINT:\r
423                 LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);\r
424                 return TK_INTEGER;\r
425         case THEX:\r
426                 LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);\r
427                 return TK_INTEGER;\r
428         }\r
429         return 0;\r
430 }\r
431 \r
432 SQInteger SQLexer::ReadID()\r
433 {\r
434         SQInteger res;\r
435         INIT_TEMP_STRING();\r
436         do {\r
437                 APPEND_CHAR(CUR_CHAR);\r
438                 NEXT();\r
439         } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));\r
440         TERMINATE_BUFFER();\r
441         res = GetIDType(&_longstr[0]);\r
442         if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {\r
443                 _svalue = &_longstr[0];\r
444         }\r
445         return res;\r
446 }\r