9c3c2685d3d9df48382881c5e999bddd8011bfb9
[supertux.git] / src / squirrel / sqstdlib / sqstdrex.cpp
1 /* see copyright notice in squirrel.h */\r
2 #include <squirrel.h>\r
3 #include <string.h>\r
4 #include <ctype.h>\r
5 #include <setjmp.h>\r
6 #include "sqstdstring.h"\r
7 \r
8 #ifdef _UINCODE\r
9 #define scisprint iswprint\r
10 #else\r
11 #define scisprint isprint\r
12 #endif\r
13 \r
14 #ifdef _DEBUG\r
15 #include <stdio.h>\r
16 \r
17 static const SQChar *g_nnames[] =\r
18 {\r
19         _SC("NONE"),_SC("OP_GREEDY"),   _SC("OP_OR"),\r
20         _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"),       _SC("OP_CLASS"),\r
21         _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),\r
22         _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB")\r
23 };\r
24 \r
25 #endif\r
26 \r
27 #define OP_GREEDY               MAX_CHAR+1 // * + ? {n}\r
28 #define OP_OR                   MAX_CHAR+2\r
29 #define OP_EXPR                 MAX_CHAR+3 //parentesis ()\r
30 #define OP_NOCAPEXPR    MAX_CHAR+4 //parentesis (?:)\r
31 #define OP_DOT                  MAX_CHAR+5\r
32 #define OP_CLASS                MAX_CHAR+6\r
33 #define OP_CCLASS               MAX_CHAR+7\r
34 #define OP_NCLASS               MAX_CHAR+8 //negates class the [^\r
35 #define OP_RANGE                MAX_CHAR+9\r
36 #define OP_CHAR                 MAX_CHAR+10\r
37 #define OP_EOL                  MAX_CHAR+11\r
38 #define OP_BOL                  MAX_CHAR+12\r
39 #define OP_WB                   MAX_CHAR+13\r
40 \r
41 #define SQREX_SYMBOL_ANY_CHAR '.'\r
42 #define SQREX_SYMBOL_GREEDY_ONE_OR_MORE '+'\r
43 #define SQREX_SYMBOL_GREEDY_ZERO_OR_MORE '*'\r
44 #define SQREX_SYMBOL_GREEDY_ZERO_OR_ONE '?'\r
45 #define SQREX_SYMBOL_BRANCH '|'\r
46 #define SQREX_SYMBOL_END_OF_STRING '$'\r
47 #define SQREX_SYMBOL_BEGINNING_OF_STRING '^'\r
48 #define SQREX_SYMBOL_ESCAPE_CHAR '\\'\r
49 \r
50 \r
51 typedef int SQRexNodeType;\r
52 \r
53 typedef struct tagSQRexNode{\r
54         SQRexNodeType type;\r
55         SQInteger left;\r
56         SQInteger right;\r
57         SQInteger next;\r
58 }SQRexNode;\r
59 \r
60 struct SQRex{\r
61         const SQChar *_eol;\r
62         const SQChar *_bol;\r
63         const SQChar *_p;\r
64         SQInteger _first;\r
65         SQInteger _op;\r
66         SQRexNode *_nodes;\r
67         SQInteger _nallocated;\r
68         SQInteger _nsize;\r
69         SQInteger _nsubexpr;\r
70         SQRexMatch *_matches;\r
71         SQInteger _currsubexp;\r
72         void *_jmpbuf;\r
73         const SQChar **_error;\r
74 };\r
75 \r
76 static SQInteger sqstd_rex_list(SQRex *exp);\r
77 \r
78 static SQInteger sqstd_rex_newnode(SQRex *exp, SQRexNodeType type)\r
79 {\r
80         SQRexNode n;\r
81         n.type = type;\r
82         n.next = n.right = n.left = -1;\r
83         if(type == OP_EXPR)\r
84                 n.right = exp->_nsubexpr++;\r
85         if(exp->_nallocated < (exp->_nsize + 1)) {\r
86                 SQInteger oldsize = exp->_nallocated;\r
87                 exp->_nallocated *= 2;\r
88                 exp->_nodes = (SQRexNode *)sq_realloc(exp->_nodes, oldsize * sizeof(SQRexNode) ,exp->_nallocated * sizeof(SQRexNode));\r
89         }\r
90         exp->_nodes[exp->_nsize++] = n;\r
91         return (SQInteger)exp->_nsize - 1;\r
92 }\r
93 \r
94 static void sqstd_rex_error(SQRex *exp,const SQChar *error)\r
95 {\r
96         if(exp->_error) *exp->_error = error;\r
97         longjmp(*((jmp_buf*)exp->_jmpbuf),-1);\r
98 }\r
99 \r
100 static void sqstd_rex_expect(SQRex *exp, SQInteger n){\r
101         if((*exp->_p) != n) \r
102                 sqstd_rex_error(exp, _SC("expected paren"));\r
103         exp->_p++;\r
104 }\r
105 \r
106 /*static SQBool sqstd_rex_ischar(SQChar c)\r
107 {\r
108         switch(c) {\r
109         case SQREX_SYMBOL_BRANCH:case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE:\r
110         case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE:case SQREX_SYMBOL_GREEDY_ONE_OR_MORE:\r
111         case SQREX_SYMBOL_BEGINNING_OF_STRING:case SQREX_SYMBOL_END_OF_STRING:\r
112         case SQREX_SYMBOL_ANY_CHAR:case SQREX_SYMBOL_ESCAPE_CHAR:case '(':case ')':case '[':case '{': case '}':\r
113                 return SQFalse;\r
114     }\r
115         return SQTrue;\r
116 }*/\r
117 \r
118 static SQChar sqstd_rex_escapechar(SQRex *exp)\r
119 {\r
120         if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR){\r
121                 exp->_p++;\r
122                 switch(*exp->_p) {\r
123                 case 'v': exp->_p++; return '\v';\r
124                 case 'n': exp->_p++; return '\n';\r
125                 case 't': exp->_p++; return '\t';\r
126                 case 'r': exp->_p++; return '\r';\r
127                 case 'f': exp->_p++; return '\f';\r
128                 default: return (*exp->_p++);\r
129                 }\r
130         } else if(!scisprint(*exp->_p)) sqstd_rex_error(exp,_SC("letter expected"));\r
131         return (*exp->_p++);\r
132 }\r
133 \r
134 static SQInteger sqstd_rex_charclass(SQRex *exp,SQInteger classid)\r
135 {\r
136         SQInteger n = sqstd_rex_newnode(exp,OP_CCLASS);\r
137         exp->_nodes[n].left = classid;\r
138         return n;\r
139 }\r
140 \r
141 static SQInteger sqstd_rex_charnode(SQRex *exp,SQBool isclass)\r
142 {\r
143         if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR) {\r
144                 exp->_p++;\r
145                 switch(*exp->_p) {\r
146                         case 'n': exp->_p++; return sqstd_rex_newnode(exp,'\n');\r
147                         case 't': exp->_p++; return sqstd_rex_newnode(exp,'\t');\r
148                         case 'r': exp->_p++; return sqstd_rex_newnode(exp,'\r');\r
149                         case 'f': exp->_p++; return sqstd_rex_newnode(exp,'\f');\r
150                         case 'v': exp->_p++; return sqstd_rex_newnode(exp,'\v');\r
151                         case 'a': case 'A': case 'w': case 'W': case 's': case 'S': \r
152                         case 'd': case 'D': case 'x': case 'X': case 'c': case 'C': \r
153                         case 'p': case 'P': case 'l': case 'u': \r
154                                 {\r
155                                 SQChar t = *exp->_p;\r
156                                 exp->_p++; \r
157                                 return sqstd_rex_charclass(exp,t);\r
158                                 }\r
159                         case 'b': \r
160                         case 'B':\r
161                                 if(!isclass) {\r
162                                         SQInteger node = sqstd_rex_newnode(exp,OP_WB);\r
163                                         exp->_nodes[node].left = *exp->_p;\r
164                                         exp->_p++; \r
165                                         return node;\r
166                                 } //else default\r
167                         default: return sqstd_rex_newnode(exp,(*exp->_p++));\r
168                 }\r
169         }\r
170         else if(!scisprint(*exp->_p)) {\r
171                 \r
172                 sqstd_rex_error(exp,_SC("letter expected"));\r
173         }\r
174         return sqstd_rex_newnode(exp,*exp->_p++);\r
175 }\r
176 static SQInteger sqstd_rex_class(SQRex *exp)\r
177 {\r
178         SQInteger ret = -1;\r
179         SQInteger first = -1,chain;\r
180         if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING){\r
181                 ret = sqstd_rex_newnode(exp,OP_NCLASS);\r
182                 exp->_p++;\r
183         }else ret = sqstd_rex_newnode(exp,OP_CLASS);\r
184         \r
185         if(*exp->_p == ']') sqstd_rex_error(exp,_SC("empty class"));\r
186         chain = ret;\r
187         while(*exp->_p != ']' && exp->_p != exp->_eol) {\r
188                 if(*exp->_p == '-' && first != -1){ \r
189                         SQInteger r;\r
190                         if(*exp->_p++ == ']') sqstd_rex_error(exp,_SC("unfinished range"));\r
191                         r = sqstd_rex_newnode(exp,OP_RANGE);\r
192                         if(first>*exp->_p) sqstd_rex_error(exp,_SC("invalid range"));\r
193                         if(exp->_nodes[first].type == OP_CCLASS) sqstd_rex_error(exp,_SC("cannot use character classes in ranges"));\r
194                         exp->_nodes[r].left = exp->_nodes[first].type;\r
195                         exp->_nodes[r].right = sqstd_rex_escapechar(exp);\r
196             exp->_nodes[chain].next = r;\r
197                         chain = r;\r
198                         first = -1;\r
199                 }\r
200                 else{\r
201                         if(first!=-1){\r
202                                 SQInteger c = first;\r
203                                 exp->_nodes[chain].next = c;\r
204                                 chain = c;\r
205                                 first = sqstd_rex_charnode(exp,SQTrue);\r
206                         }\r
207                         else{\r
208                                 first = sqstd_rex_charnode(exp,SQTrue);\r
209                         }\r
210                 }\r
211         }\r
212         if(first!=-1){\r
213                 SQInteger c = first;\r
214                 exp->_nodes[chain].next = c;\r
215                 chain = c;\r
216                 first = -1;\r
217         }\r
218         /* hack? */\r
219         exp->_nodes[ret].left = exp->_nodes[ret].next;\r
220         exp->_nodes[ret].next = -1;\r
221         return ret;\r
222 }\r
223 \r
224 static SQInteger sqstd_rex_parsenumber(SQRex *exp)\r
225 {\r
226         SQInteger ret = *exp->_p-'0';\r
227         SQInteger positions = 10;\r
228         exp->_p++;\r
229         while(isdigit(*exp->_p)) {\r
230                 ret = ret*10+(*exp->_p++-'0');\r
231                 if(positions==1000000000) sqstd_rex_error(exp,_SC("overflow in numeric constant"));\r
232                 positions *= 10;\r
233         };\r
234         return ret;\r
235 }\r
236 \r
237 static SQInteger sqstd_rex_element(SQRex *exp)\r
238 {\r
239         SQInteger ret;\r
240         switch(*exp->_p)\r
241         {\r
242         case '(': {\r
243                 SQInteger expr;\r
244                 exp->_p++;\r
245                 \r
246                 \r
247                 if(*exp->_p =='?') {\r
248                         exp->_p++;\r
249                         sqstd_rex_expect(exp,':');\r
250                         expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR);\r
251                 }\r
252                 else\r
253                         expr = sqstd_rex_newnode(exp,OP_EXPR);\r
254                 exp->_nodes[expr].left = sqstd_rex_list(exp);\r
255                 ret = expr;\r
256                 sqstd_rex_expect(exp,')');\r
257         }\r
258                 break;\r
259         case '[':\r
260                 exp->_p++;\r
261                 ret = sqstd_rex_class(exp);\r
262                 sqstd_rex_expect(exp,']');\r
263                 break;\r
264         case SQREX_SYMBOL_END_OF_STRING: exp->_p++; ret = sqstd_rex_newnode(exp,OP_EOL);break;\r
265         case SQREX_SYMBOL_ANY_CHAR: exp->_p++; ret = sqstd_rex_newnode(exp,OP_DOT);break;\r
266         default:\r
267                 ret = sqstd_rex_charnode(exp,SQFalse);\r
268                 break;\r
269         }\r
270         /* scope block */\r
271         {\r
272                 SQInteger op;\r
273                 unsigned short p0 = 0, p1 = 0;\r
274                 switch(*exp->_p){\r
275                 case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; goto __end;\r
276                 case SQREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; goto __end;\r
277                 case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; goto __end;\r
278                 case '{':{\r
279                         exp->_p++;\r
280                         if(!isdigit(*exp->_p)) sqstd_rex_error(exp,_SC("number expected"));\r
281                         p0 = (unsigned short)sqstd_rex_parsenumber(exp);\r
282                         switch(*exp->_p) {\r
283                         case '}':\r
284                                 p1 = p0; exp->_p++;\r
285                                 goto __end;\r
286                         case ',':\r
287                                 exp->_p++;\r
288                                 p1 = 0xFFFF;\r
289                                 if(isdigit(*exp->_p)){\r
290                                         p1 = (unsigned short)sqstd_rex_parsenumber(exp);\r
291                                 }\r
292                                 sqstd_rex_expect(exp,'}');\r
293                                 goto __end;\r
294                         default:\r
295                                 sqstd_rex_error(exp,_SC(", or } expected"));\r
296                         }\r
297                 }\r
298                 __end: {\r
299                                 SQInteger nnode = sqstd_rex_newnode(exp,OP_GREEDY);\r
300                                 op = OP_GREEDY;\r
301                                 exp->_nodes[nnode].left = ret;\r
302                                 exp->_nodes[nnode].right = ((p0)<<16)|p1;\r
303                                 ret = nnode;\r
304                         }\r
305                 }\r
306         }\r
307         if(*exp->_p != SQREX_SYMBOL_BRANCH && *exp->_p != ')' && *exp->_p != SQREX_SYMBOL_GREEDY_ZERO_OR_MORE && *exp->_p != SQREX_SYMBOL_GREEDY_ONE_OR_MORE && *exp->_p != '\0')\r
308                 exp->_nodes[ret].next = sqstd_rex_element(exp);\r
309         return ret;\r
310 }\r
311 \r
312 static SQInteger sqstd_rex_list(SQRex *exp)\r
313 {\r
314         SQInteger ret=-1,e;\r
315         if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING) {\r
316                 exp->_p++;\r
317                 ret = sqstd_rex_newnode(exp,OP_BOL);\r
318         }\r
319         e = sqstd_rex_element(exp);\r
320         if(ret != -1) {\r
321                 exp->_nodes[ret].next = e;\r
322         }\r
323         else ret = e;\r
324 \r
325         if(*exp->_p == SQREX_SYMBOL_BRANCH) {\r
326                 SQInteger temp;\r
327                 exp->_p++;\r
328                 temp = sqstd_rex_newnode(exp,OP_OR);\r
329                 exp->_nodes[temp].left = ret;\r
330                 exp->_nodes[temp].right = sqstd_rex_list(exp);\r
331                 ret = temp;\r
332         }\r
333         return ret;\r
334 }\r
335 \r
336 static SQBool sqstd_rex_matchcclass(SQInteger cclass,SQChar c)\r
337 {\r
338         switch(cclass) {\r
339         case 'a': return isalpha(c)?SQTrue:SQFalse;\r
340         case 'A': return !isalpha(c)?SQTrue:SQFalse;\r
341         case 'w': return (isalnum(c) || c == '_')?SQTrue:SQFalse;\r
342         case 'W': return (!isalnum(c) && c != '_')?SQTrue:SQFalse;\r
343         case 's': return isspace(c)?SQTrue:SQFalse;\r
344         case 'S': return !isspace(c)?SQTrue:SQFalse;\r
345         case 'd': return isdigit(c)?SQTrue:SQFalse;\r
346         case 'D': return !isdigit(c)?SQTrue:SQFalse;\r
347         case 'x': return isxdigit(c)?SQTrue:SQFalse;\r
348         case 'X': return !isxdigit(c)?SQTrue:SQFalse;\r
349         case 'c': return iscntrl(c)?SQTrue:SQFalse;\r
350         case 'C': return !iscntrl(c)?SQTrue:SQFalse;\r
351         case 'p': return ispunct(c)?SQTrue:SQFalse;\r
352         case 'P': return !ispunct(c)?SQTrue:SQFalse;\r
353         case 'l': return islower(c)?SQTrue:SQFalse;\r
354         case 'u': return isupper(c)?SQTrue:SQFalse;\r
355         }\r
356         return SQFalse; /*cannot happen*/\r
357 }\r
358 \r
359 static SQBool sqstd_rex_matchclass(SQRex* exp,SQRexNode *node,SQChar c)\r
360 {\r
361         do {\r
362                 switch(node->type) {\r
363                         case OP_RANGE:\r
364                                 if(c >= node->left && c <= node->right) return SQTrue;\r
365                                 break;\r
366                         case OP_CCLASS:\r
367                                 if(sqstd_rex_matchcclass(node->left,c)) return SQTrue;\r
368                                 break;\r
369                         default:\r
370                                 if(c == node->type)return SQTrue;\r
371                 }\r
372         } while((node->next != -1) && (node = &exp->_nodes[node->next]));\r
373         return SQFalse;\r
374 }\r
375 \r
376 static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar *str,SQRexNode *next)\r
377 {\r
378         \r
379         SQRexNodeType type = node->type;\r
380         switch(type) {\r
381         case OP_GREEDY: {\r
382                 //SQRexNode *greedystop = (node->next != -1) ? &exp->_nodes[node->next] : NULL;\r
383                 SQRexNode *greedystop = NULL;\r
384                 SQInteger p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;\r
385                 const SQChar *s=str, *good = str;\r
386 \r
387                 if(node->next != -1) {\r
388                         greedystop = &exp->_nodes[node->next];\r
389                 }\r
390                 else {\r
391                         greedystop = next;\r
392                 }\r
393 \r
394                 while((nmaches == 0xFFFF || nmaches < p1)) {\r
395 \r
396                         const SQChar *stop;\r
397                         if(!(s = sqstd_rex_matchnode(exp,&exp->_nodes[node->left],s,greedystop)))\r
398                                 break;\r
399                         nmaches++;\r
400                         good=s;\r
401                         if(greedystop) {\r
402                                 //checks that 0 matches satisfy the expression(if so skips)\r
403                                 //if not would always stop(for instance if is a '?')\r
404                                 if(greedystop->type != OP_GREEDY ||\r
405                                 (greedystop->type == OP_GREEDY && ((greedystop->right >> 16)&0x0000FFFF) != 0))\r
406                                 {\r
407                                         SQRexNode *gnext = NULL;\r
408                                         if(greedystop->next != -1) {\r
409                                                 gnext = &exp->_nodes[greedystop->next];\r
410                                         }else if(next && next->next != -1){\r
411                                                 gnext = &exp->_nodes[next->next];\r
412                                         }\r
413                                         stop = sqstd_rex_matchnode(exp,greedystop,s,gnext);\r
414                                         if(stop) {\r
415                                                 //if satisfied stop it\r
416                                                 if(p0 == p1 && p0 == nmaches) break;\r
417                                                 else if(nmaches >= p0 && p1 == 0xFFFF) break;\r
418                                                 else if(nmaches >= p0 && nmaches <= p1) break;\r
419                                         }\r
420                                 }\r
421                         }\r
422                         \r
423                         if(s >= exp->_eol)\r
424                                 break;\r
425                 }\r
426                 if(p0 == p1 && p0 == nmaches) return good;\r
427                 else if(nmaches >= p0 && p1 == 0xFFFF) return good;\r
428                 else if(nmaches >= p0 && nmaches <= p1) return good;\r
429                 return NULL;\r
430         }\r
431         case OP_OR: {\r
432                         const SQChar *asd = str;\r
433                         SQRexNode *temp=&exp->_nodes[node->left];\r
434                         while( (asd = sqstd_rex_matchnode(exp,temp,asd,NULL)) ) {\r
435                                 if(temp->next != -1)\r
436                                         temp = &exp->_nodes[temp->next];\r
437                                 else\r
438                                         return asd;\r
439                         }\r
440                         asd = str;\r
441                         temp = &exp->_nodes[node->right];\r
442                         while( (asd = sqstd_rex_matchnode(exp,temp,asd,NULL)) ) {\r
443                                 if(temp->next != -1)\r
444                                         temp = &exp->_nodes[temp->next];\r
445                                 else\r
446                                         return asd;\r
447                         }\r
448                         return NULL;\r
449                         break;\r
450         }\r
451         case OP_EXPR:\r
452         case OP_NOCAPEXPR:{\r
453                         SQRexNode *n = &exp->_nodes[node->left];\r
454                         const SQChar *cur = str;\r
455                         SQInteger capture = -1;\r
456                         if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {\r
457                                 capture = exp->_currsubexp;\r
458                                 exp->_matches[capture].begin = cur;\r
459                                 exp->_currsubexp++;\r
460                         }\r
461                         \r
462                         do {\r
463                                 SQRexNode *subnext = NULL;\r
464                                 if(n->next != -1) {\r
465                                         subnext = &exp->_nodes[n->next];\r
466                                 }else {\r
467                                         subnext = next;\r
468                                 }\r
469                                 if(!(cur = sqstd_rex_matchnode(exp,n,cur,subnext))) {\r
470                                         if(capture != -1){\r
471                                                 exp->_matches[capture].begin = 0;\r
472                                                 exp->_matches[capture].len = 0;\r
473                                         }\r
474                                         return NULL;\r
475                                 }\r
476                         } while((n->next != -1) && (n = &exp->_nodes[n->next]));\r
477 \r
478                         if(capture != -1) \r
479                                 exp->_matches[capture].len = cur - exp->_matches[capture].begin;\r
480                         return cur;\r
481         }                                \r
482         case OP_WB:\r
483                 if(str == exp->_bol && !isspace(*str)\r
484                  || (str == exp->_eol && !isspace(*(str-1)))\r
485                  || (!isspace(*str) && isspace(*(str+1)))\r
486                  || (isspace(*str) && !isspace(*(str+1))) ) {\r
487                         return (node->left == 'b')?str:NULL;\r
488                 }\r
489                 return (node->left == 'b')?NULL:str;\r
490         case OP_BOL:\r
491                 if(str == exp->_bol) return str;\r
492                 return NULL;\r
493         case OP_EOL:\r
494                 if(str == exp->_eol) return str;\r
495                 return NULL;\r
496         case OP_DOT:{\r
497                 *str++;\r
498                                 }\r
499                 return str;\r
500         case OP_NCLASS:\r
501         case OP_CLASS:\r
502                 if(sqstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?SQTrue:SQFalse):(type == OP_NCLASS?SQTrue:SQFalse)) {\r
503                         *str++;\r
504                         return str;\r
505                 }\r
506                 return NULL;\r
507         case OP_CCLASS:\r
508                 if(sqstd_rex_matchcclass(node->left,*str)) {\r
509                         *str++;\r
510                         return str;\r
511                 }\r
512                 return NULL;\r
513         default: /* char */\r
514                 if(*str != node->type) return NULL;\r
515                 *str++;\r
516                 return str;\r
517         }\r
518         return NULL;\r
519 }\r
520 \r
521 /* public api */\r
522 SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error)\r
523 {\r
524         SQRex *exp = (SQRex *)sq_malloc(sizeof(SQRex));\r
525         exp->_eol = exp->_bol = NULL;\r
526         exp->_p = pattern;\r
527         exp->_nallocated = (SQInteger)scstrlen(pattern) * sizeof(SQChar);\r
528         exp->_nodes = (SQRexNode *)sq_malloc(exp->_nallocated * sizeof(SQRexNode));\r
529         exp->_nsize = 0;\r
530         exp->_matches = 0;\r
531         exp->_nsubexpr = 0;\r
532         exp->_first = sqstd_rex_newnode(exp,OP_EXPR);\r
533         exp->_error = error;\r
534         exp->_jmpbuf = sq_malloc(sizeof(jmp_buf));\r
535         if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {\r
536                 exp->_nodes[exp->_first].left=sqstd_rex_list(exp);\r
537                 if(*exp->_p!='\0')\r
538                         sqstd_rex_error(exp,_SC("unexpected character"));\r
539 #ifdef _DEBUG\r
540                 {\r
541                         SQInteger nsize,i;\r
542                         SQRexNode *t;\r
543                         nsize = exp->_nsize;\r
544                         t = &exp->_nodes[0];\r
545                         scprintf(_SC("\n"));\r
546                         for(i = 0;i < nsize; i++) {\r
547                                 if(exp->_nodes[i].type>MAX_CHAR)\r
548                                         scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);\r
549                                 else\r
550                                         scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type);\r
551                                 scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next);\r
552                         }\r
553                         scprintf(_SC("\n"));\r
554                 }\r
555 #endif\r
556                 exp->_matches = (SQRexMatch *) sq_malloc(exp->_nsubexpr * sizeof(SQRexMatch));\r
557                 memset(exp->_matches,0,exp->_nsubexpr * sizeof(SQRexMatch));\r
558         }\r
559         else{\r
560                 sqstd_rex_free(exp);\r
561                 return NULL;\r
562         }\r
563         return exp;\r
564 }\r
565 \r
566 void sqstd_rex_free(SQRex *exp)\r
567 {\r
568         if(exp) {\r
569                 if(exp->_nodes) sq_free(exp->_nodes,exp->_nallocated * sizeof(SQRexNode));\r
570                 if(exp->_jmpbuf) sq_free(exp->_jmpbuf,sizeof(jmp_buf));\r
571                 if(exp->_matches) sq_free(exp->_matches,exp->_nsubexpr * sizeof(SQRexMatch));\r
572                 sq_free(exp,sizeof(SQRex));\r
573         }\r
574 }\r
575 \r
576 SQBool sqstd_rex_match(SQRex* exp,const SQChar* text)\r
577 {\r
578         const SQChar* res = NULL;\r
579         exp->_bol = text;\r
580         exp->_eol = text + scstrlen(text);\r
581         exp->_currsubexp = 0;\r
582         res = sqstd_rex_matchnode(exp,exp->_nodes,text,NULL);\r
583         if(res == NULL || res != exp->_eol)\r
584                 return SQFalse;\r
585         return SQTrue;\r
586 }\r
587 \r
588 SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end)\r
589 {\r
590         const SQChar *cur = NULL;\r
591         SQInteger node = exp->_first;\r
592         if(text_begin >= text_end) return SQFalse;\r
593         exp->_bol = text_begin;\r
594         exp->_eol = text_end;\r
595         do {\r
596                 cur = text_begin;\r
597                 while(node != -1) {\r
598                         exp->_currsubexp = 0;\r
599                         cur = sqstd_rex_matchnode(exp,&exp->_nodes[node],cur,NULL);\r
600                         if(!cur)\r
601                                 break;\r
602                         node = exp->_nodes[node].next;\r
603                 }\r
604                 *text_begin++;\r
605         } while(cur == NULL && text_begin != text_end);\r
606 \r
607         if(cur == NULL)\r
608                 return SQFalse;\r
609 \r
610         --text_begin;\r
611 \r
612         if(out_begin) *out_begin = text_begin;\r
613         if(out_end) *out_end = cur;\r
614         return SQTrue;\r
615 }\r
616 \r
617 SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end)\r
618 {\r
619         return sqstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);\r
620 }\r
621 \r
622 SQInteger sqstd_rex_getsubexpcount(SQRex* exp)\r
623 {\r
624         return exp->_nsubexpr;\r
625 }\r
626 \r
627 SQBool sqstd_rex_getsubexp(SQRex* exp, SQInteger n, SQRexMatch *subexp)\r
628 {\r
629         if( n<0 || n >= exp->_nsubexpr) return SQFalse;\r
630         *subexp = exp->_matches[n];\r
631         return SQTrue;\r
632 }\r
633 \r