-/* see copyright notice in squirrel.h */
-#include <squirrel.h>
-#include <string.h>
-#include <ctype.h>
-#include <setjmp.h>
-#include "sqstdstring.h"
-
-#ifdef _DEBUG
-#include <stdio.h>
-
-static const SQChar *g_nnames[] =
-{
- _SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"),
- _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"),
- _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),
- _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB")
-};
-
-#endif
-
-#define OP_GREEDY MAX_CHAR+1 // * + ? {n}
-#define OP_OR MAX_CHAR+2
-#define OP_EXPR MAX_CHAR+3 //parentesis ()
-#define OP_NOCAPEXPR MAX_CHAR+4 //parentesis (?:)
-#define OP_DOT MAX_CHAR+5
-#define OP_CLASS MAX_CHAR+6
-#define OP_CCLASS MAX_CHAR+7
-#define OP_NCLASS MAX_CHAR+8 //negates class the [^
-#define OP_RANGE MAX_CHAR+9
-#define OP_CHAR MAX_CHAR+10
-#define OP_EOL MAX_CHAR+11
-#define OP_BOL MAX_CHAR+12
-#define OP_WB MAX_CHAR+13
-
-#define SQREX_SYMBOL_ANY_CHAR '.'
-#define SQREX_SYMBOL_GREEDY_ONE_OR_MORE '+'
-#define SQREX_SYMBOL_GREEDY_ZERO_OR_MORE '*'
-#define SQREX_SYMBOL_GREEDY_ZERO_OR_ONE '?'
-#define SQREX_SYMBOL_BRANCH '|'
-#define SQREX_SYMBOL_END_OF_STRING '$'
-#define SQREX_SYMBOL_BEGINNING_OF_STRING '^'
-#define SQREX_SYMBOL_ESCAPE_CHAR '\\'
-
-
-typedef int SQRexNodeType;
-
-typedef struct tagSQRexNode{
- SQRexNodeType type;
- long left;
- long right;
- int next;
-}SQRexNode;
-
-struct SQRex{
- const SQChar *_eol;
- const SQChar *_bol;
- const SQChar *_p;
- int _first;
- int _op;
- SQRexNode *_nodes;
- int _nallocated;
- int _nsize;
- int _nsubexpr;
- SQRexMatch *_matches;
- int _currsubexp;
- void *_jmpbuf;
- const SQChar **_error;
-};
-
-static int sqstd_rex_list(SQRex *exp);
-
-static int sqstd_rex_newnode(SQRex *exp, SQRexNodeType type)
-{
- SQRexNode n;
- n.type = type;
- n.next = n.right = n.left = -1;
- if(type == OP_EXPR)
- n.right = exp->_nsubexpr++;
- if(exp->_nallocated < (exp->_nsize + 1)) {
- int oldsize = exp->_nallocated;
- exp->_nallocated *= 2;
- exp->_nodes = (SQRexNode *)sq_realloc(exp->_nodes, oldsize * sizeof(SQRexNode) ,exp->_nallocated * sizeof(SQRexNode));
- }
- exp->_nodes[exp->_nsize++] = n;
- return (int)exp->_nsize - 1;
-}
-
-static void sqstd_rex_error(SQRex *exp,const SQChar *error)
-{
- if(exp->_error) *exp->_error = error;
- longjmp(*((jmp_buf*)exp->_jmpbuf),-1);
-}
-
-static void sqstd_rex_expect(SQRex *exp, int n){
- if((*exp->_p) != n)
- sqstd_rex_error(exp, _SC("expected paren"));
- exp->_p++;
-}
-
-static SQBool sqstd_rex_ischar(SQChar c)
-{
- switch(c) {
- case SQREX_SYMBOL_BRANCH:case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE:
- case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE:case SQREX_SYMBOL_GREEDY_ONE_OR_MORE:
- case SQREX_SYMBOL_BEGINNING_OF_STRING:case SQREX_SYMBOL_END_OF_STRING:
- case SQREX_SYMBOL_ANY_CHAR:case SQREX_SYMBOL_ESCAPE_CHAR:case '(':case ')':case '[':case '{': case '}':
- return SQFalse;
- }
- return SQTrue;
-}
-
-static SQChar sqstd_rex_escapechar(SQRex *exp)
-{
- if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR){
- exp->_p++;
- switch(*exp->_p) {
- case 'v': exp->_p++; return '\v';
- case 'n': exp->_p++; return '\n';
- case 't': exp->_p++; return '\t';
- case 'r': exp->_p++; return '\r';
- case 'f': exp->_p++; return '\f';
- default: return (*exp->_p++);
- }
- } else if(!sqstd_rex_ischar(*exp->_p)) sqstd_rex_error(exp,_SC("letter expected"));
- return (*exp->_p++);
-}
-
-static int sqstd_rex_charclass(SQRex *exp,int classid)
-{
- int n = sqstd_rex_newnode(exp,OP_CCLASS);
- exp->_nodes[n].left = classid;
- return n;
-}
-
-static int sqstd_rex_charnode(SQRex *exp,SQBool isclass)
-{
- if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR) {
- exp->_p++;
- switch(*exp->_p) {
- case 'n': exp->_p++; return sqstd_rex_newnode(exp,'\n');
- case 't': exp->_p++; return sqstd_rex_newnode(exp,'\t');
- case 'r': exp->_p++; return sqstd_rex_newnode(exp,'\r');
- case 'f': exp->_p++; return sqstd_rex_newnode(exp,'\f');
- case 'v': exp->_p++; return sqstd_rex_newnode(exp,'\v');
- case 'a': case 'A': case 'w': case 'W': case 's': case 'S':
- case 'd': case 'D': case 'x': case 'X': case 'c': case 'C':
- case 'p': case 'P': case 'l': case 'u':
- {
- SQChar t = *exp->_p;
- exp->_p++;
- return sqstd_rex_charclass(exp,t);
- }
- case 'b':
- case 'B':
- if(!isclass) {
- int node = sqstd_rex_newnode(exp,OP_WB);
- exp->_nodes[node].left = *exp->_p;
- exp->_p++;
- return node;
- } //else default
- default: return sqstd_rex_newnode(exp,(*exp->_p++));
- }
- }
- else if(!sqstd_rex_ischar(*exp->_p)) {
-
- sqstd_rex_error(exp,_SC("letter expected"));
- }
- return sqstd_rex_newnode(exp,*exp->_p++);
-}
-static int sqstd_rex_class(SQRex *exp)
-{
- int ret = -1;
- int first = -1,chain;
- if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING){
- ret = sqstd_rex_newnode(exp,OP_NCLASS);
- exp->_p++;
- }else ret = sqstd_rex_newnode(exp,OP_CLASS);
-
- if(*exp->_p == ']' || *exp->_p == '-'){
- first = *exp->_p;
- exp->_p++;
- }
- chain = ret;
- while(*exp->_p != ']' && exp->_p != exp->_eol) {
- if(*exp->_p == '-' && first != -1){
- int r;
- if(*exp->_p++ == ']') sqstd_rex_error(exp,_SC("unfinished range"));
- r = sqstd_rex_newnode(exp,OP_RANGE);
- if(first>*exp->_p) sqstd_rex_error(exp,_SC("invalid range"));
- if(exp->_nodes[first].type == OP_CCLASS) sqstd_rex_error(exp,_SC("cannot use character classes in ranges"));
- exp->_nodes[r].left = exp->_nodes[first].type;
- exp->_nodes[r].right = sqstd_rex_escapechar(exp);
- exp->_nodes[chain].next = r;
- chain = r;
- first = -1;
- }
- else{
- if(first!=-1){
- int c = first;
- exp->_nodes[chain].next = c;
- chain = c;
- first = sqstd_rex_charnode(exp,SQTrue);
- }
- else{
- first = sqstd_rex_charnode(exp,SQTrue);
- }
- }
- }
- if(first!=-1){
- int c = first;
- exp->_nodes[chain].next = c;
- chain = c;
- first = -1;
- }
- /* hack? */
- exp->_nodes[ret].left = exp->_nodes[ret].next;
- exp->_nodes[ret].next = -1;
- return ret;
-}
-
-static int sqstd_rex_parsenumber(SQRex *exp)
-{
- int ret = *exp->_p-'0';
- int positions = 10;
- exp->_p++;
- while(isdigit(*exp->_p)) {
- ret = ret*10+(*exp->_p++-'0');
- if(positions==1000000000) sqstd_rex_error(exp,_SC("overflow in numeric constant"));
- positions *= 10;
- };
- return ret;
-}
-
-static int sqstd_rex_element(SQRex *exp)
-{
- int ret;
- switch(*exp->_p)
- {
- case '(': {
- int expr;
- exp->_p++;
-
-
- if(*exp->_p =='?') {
- exp->_p++;
- sqstd_rex_expect(exp,':');
- expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR);
- }
- else
- expr = sqstd_rex_newnode(exp,OP_EXPR);
- exp->_nodes[expr].left = sqstd_rex_list(exp);
- ret = expr;
- sqstd_rex_expect(exp,')');
- }
- break;
- case '[':
- exp->_p++;
- ret = sqstd_rex_class(exp);
- sqstd_rex_expect(exp,']');
- break;
- case SQREX_SYMBOL_END_OF_STRING: exp->_p++; ret = sqstd_rex_newnode(exp,OP_EOL);break;
- case SQREX_SYMBOL_ANY_CHAR: exp->_p++; ret = sqstd_rex_newnode(exp,OP_DOT);break;
- default:
- ret = sqstd_rex_charnode(exp,SQFalse);
- break;
- }
- /* scope block */
- {
- int op;
- unsigned short p0 = 0, p1 = 0;
- switch(*exp->_p){
- case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; goto __end;
- case SQREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; goto __end;
- case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; goto __end;
- case '{':{
- exp->_p++;
- if(!isdigit(*exp->_p)) sqstd_rex_error(exp,_SC("number expected"));
- p0 = sqstd_rex_parsenumber(exp);
- switch(*exp->_p) {
- case '}':
- p1 = p0; exp->_p++;
- goto __end;
- case ',':
- exp->_p++;
- p1 = 0xFFFF;
- if(isdigit(*exp->_p)){
- p1 = sqstd_rex_parsenumber(exp);
- }
- sqstd_rex_expect(exp,'}');
- goto __end;
- default:
- sqstd_rex_error(exp,_SC(", or } expected"));
- }
- }
- __end: {
- int nnode = sqstd_rex_newnode(exp,OP_GREEDY);
- op = OP_GREEDY;
- exp->_nodes[nnode].left = ret;
- exp->_nodes[nnode].right = ((p0)<<16)|p1;
- ret = nnode;
- }
- }
- }
- if(*exp->_p != SQREX_SYMBOL_BRANCH && *exp->_p != ')' && *exp->_p != SQREX_SYMBOL_GREEDY_ZERO_OR_MORE && *exp->_p != SQREX_SYMBOL_GREEDY_ONE_OR_MORE && *exp->_p != '\0')
- exp->_nodes[ret].next = sqstd_rex_element(exp);
- return ret;
-}
-
-static int sqstd_rex_list(SQRex *exp)
-{
- int ret=-1,e;
- if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING) {
- exp->_p++;
- ret = sqstd_rex_newnode(exp,OP_BOL);
- }
- e = sqstd_rex_element(exp);
- if(ret != -1) {
- exp->_nodes[ret].next = e;
- }
- else ret = e;
-
- if(*exp->_p == SQREX_SYMBOL_BRANCH) {
- int temp;
- exp->_p++;
- temp = sqstd_rex_newnode(exp,OP_OR);
- exp->_nodes[temp].left = ret;
- exp->_nodes[temp].right = sqstd_rex_list(exp);
- ret = temp;
- }
- return ret;
-}
-
-static SQBool sqstd_rex_matchcclass(int cclass,SQChar c)
-{
- switch(cclass) {
- case 'a': return isalpha(c)?SQTrue:SQFalse;
- case 'A': return !isalpha(c)?SQTrue:SQFalse;
- case 'w': return (isalnum(c) || c == '_')?SQTrue:SQFalse;
- case 'W': return (!isalnum(c) && c != '_')?SQTrue:SQFalse;
- case 's': return isspace(c)?SQTrue:SQFalse;
- case 'S': return !isspace(c)?SQTrue:SQFalse;
- case 'd': return isdigit(c)?SQTrue:SQFalse;
- case 'D': return !isdigit(c)?SQTrue:SQFalse;
- case 'x': return isxdigit(c)?SQTrue:SQFalse;
- case 'X': return !isxdigit(c)?SQTrue:SQFalse;
- case 'c': return iscntrl(c)?SQTrue:SQFalse;
- case 'C': return !iscntrl(c)?SQTrue:SQFalse;
- case 'p': return ispunct(c)?SQTrue:SQFalse;
- case 'P': return !ispunct(c)?SQTrue:SQFalse;
- case 'l': return islower(c)?SQTrue:SQFalse;
- case 'u': return isupper(c)?SQTrue:SQFalse;
- }
- return SQFalse; /*cannot happen*/
-}
-
-static SQBool sqstd_rex_matchclass(SQRex* exp,SQRexNode *node,SQChar c)
-{
- do {
- switch(node->type) {
- case OP_RANGE:
- if(c >= node->left && c <= node->right) return SQTrue;
- break;
- case OP_CCLASS:
- if(sqstd_rex_matchcclass(node->left,c)) return SQTrue;
- break;
- default:
- if(c == node->type)return SQTrue;
- }
- } while((node->next != -1) && (node = &exp->_nodes[node->next]));
- return SQFalse;
-}
-
-static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar *str)
-{
- SQRexNodeType type = node->type;
- switch(type) {
- case OP_GREEDY: {
- int p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;
- const SQChar *s=str, *good = str;
- while((nmaches == 0xFFFF || nmaches < p1)
- && (s = sqstd_rex_matchnode(exp,&exp->_nodes[node->left],s))) {
- good=s;
- nmaches++;
- if(s >= exp->_eol)
- break;
- }
- if(p0 == p1 && p0 == nmaches) return good;
- else if(nmaches >= p0 && p1 == 0xFFFF) return good;
- else if(nmaches >= p0 && nmaches <= p1) return good;
- return NULL;
- }
- case OP_OR: {
- const SQChar *asd = str;
- SQRexNode *temp=&exp->_nodes[node->left];
- while( (asd = sqstd_rex_matchnode(exp,temp,asd)) ) {
- if(temp->next != -1)
- temp = &exp->_nodes[temp->next];
- else
- return asd;
- }
- asd = str;
- temp = &exp->_nodes[node->right];
- while( (asd = sqstd_rex_matchnode(exp,temp,asd)) ) {
- if(temp->next != -1)
- temp = &exp->_nodes[temp->next];
- else
- return asd;
- }
- return NULL;
- break;
- }
- case OP_EXPR:
- case OP_NOCAPEXPR:{
- SQRexNode *n = &exp->_nodes[node->left];
- const SQChar *cur = str;
- int capture = -1;
- if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {
- capture = exp->_currsubexp;
- exp->_matches[capture].begin = cur;
- exp->_currsubexp++;
- }
-
- do {
- if(!(cur = sqstd_rex_matchnode(exp,n,cur))) {
- if(capture != -1){
- exp->_matches[capture].begin = 0;
- exp->_matches[capture].len = 0;
- }
- return NULL;
- }
- } while((n->next != -1) && (n = &exp->_nodes[n->next]));
-
- if(capture != -1)
- exp->_matches[capture].len = cur - exp->_matches[capture].begin;
- return cur;
- }
- case OP_WB:
- if((str == exp->_bol && !isspace(*str))
- || (str == exp->_eol && !isspace(*(str-1)))
- || ((!isspace(*str) && isspace(*(str+1))))
- || ((isspace(*str) && !isspace(*(str+1)))) ) {
- return (node->left == 'b')?str:NULL;
- }
- return (node->left == 'b')?NULL:str;
- case OP_BOL:
- if(str == exp->_bol) return str;
- return NULL;
- case OP_EOL:
- if(str == exp->_eol) return str;
- return NULL;
- case OP_DOT:
- *str++;
- return str;
- case OP_NCLASS:
- case OP_CLASS:
- if(sqstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?SQTrue:SQFalse):(type == OP_NCLASS?SQTrue:SQFalse)) {
- *str++;
- return str;
- }
- return NULL;
- case OP_CCLASS:
- if(sqstd_rex_matchcclass(node->left,*str)) {
- *str++;
- return str;
- }
- return NULL;
- default: /* char */
- if(*str != node->type) return NULL;
- *str++;
- return str;
- }
- return NULL;
-}
-
-/* public api */
-SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error)
-{
- SQRex *exp = (SQRex *)sq_malloc(sizeof(SQRex));
- exp->_p = pattern;
- exp->_nallocated = (int)scstrlen(pattern) * sizeof(SQChar);
- exp->_nodes = (SQRexNode *)sq_malloc(exp->_nallocated * sizeof(SQRexNode));
- exp->_nsize = 0;
- exp->_matches = 0;
- exp->_nsubexpr = 0;
- exp->_first = sqstd_rex_newnode(exp,OP_EXPR);
- exp->_error = error;
- exp->_jmpbuf = sq_malloc(sizeof(jmp_buf));
- if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {
- exp->_nodes[exp->_first].left=sqstd_rex_list(exp);
- if(*exp->_p!='\0')
- sqstd_rex_error(exp,_SC("unexpected character"));
-#ifdef _DEBUG
- {
- int nsize,i;
- SQRexNode *t;
- nsize = exp->_nsize;
- t = &exp->_nodes[0];
- scprintf(_SC("\n"));
- for(i = 0;i < nsize; i++) {
- if(exp->_nodes[i].type>MAX_CHAR)
- scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);
- else
- scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type);
- scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next);
- }
- scprintf(_SC("\n"));
- }
-#endif
- exp->_matches = (SQRexMatch *) sq_malloc(exp->_nsubexpr * sizeof(SQRexMatch));
- memset(exp->_matches,0,exp->_nsubexpr * sizeof(SQRexMatch));
- }
- else{
- sqstd_rex_free(exp);
- return NULL;
- }
- return exp;
-}
-
-void sqstd_rex_free(SQRex *exp)
-{
- if(exp) {
- if(exp->_nodes) sq_free(exp->_nodes,exp->_nallocated * sizeof(SQRexNode));
- if(exp->_jmpbuf) sq_free(exp->_jmpbuf,sizeof(jmp_buf));
- if(exp->_matches) sq_free(exp->_matches,exp->_nsubexpr * sizeof(SQRexMatch));
- sq_free(exp,sizeof(SQRex));
- }
-}
-
-SQBool sqstd_rex_match(SQRex* exp,const SQChar* text)
-{
- const SQChar* res = NULL;
- exp->_bol = text;
- exp->_eol = text + scstrlen(text);
- exp->_currsubexp = 0;
- res = sqstd_rex_matchnode(exp,exp->_nodes,text);
- if(res == NULL || res != exp->_eol)
- return SQFalse;
- return SQTrue;
-}
-
-SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end)
-{
- const SQChar *cur = NULL;
- int node = exp->_first;
- if(text_begin >= text_end) return SQFalse;
- exp->_bol = text_begin;
- exp->_eol = text_end;
- do {
- cur = text_begin;
- while(node != -1) {
- exp->_currsubexp = 0;
- cur = sqstd_rex_matchnode(exp,&exp->_nodes[node],cur);
- if(!cur)
- break;
- node = exp->_nodes[node].next;
- }
- *text_begin++;
- } while(cur == NULL && text_begin != text_end);
-
- if(cur == NULL)
- return SQFalse;
-
- --text_begin;
-
- if(out_begin) *out_begin = text_begin;
- if(out_end) *out_end = cur;
- return SQTrue;
-}
-
-SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end)
-{
- return sqstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);
-}
-
-int sqstd_rex_getsubexpcount(SQRex* exp)
-{
- return exp->_nsubexpr;
-}
-
-SQBool sqstd_rex_getsubexp(SQRex* exp, int n, SQRexMatch *subexp)
-{
- if( n<0 || n >= exp->_nsubexpr) return SQFalse;
- *subexp = exp->_matches[n];
- return SQTrue;
-}
-
+/* see copyright notice in squirrel.h */\r
+#include <squirrel.h>\r
+#include <string.h>\r
+#include <ctype.h>\r
+#include <setjmp.h>\r
+#include "sqstdstring.h"\r
+\r
+#ifdef _DEBUG\r
+#include <stdio.h>\r
+\r
+static const SQChar *g_nnames[] =\r
+{\r
+ _SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"),\r
+ _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"),\r
+ _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),\r
+ _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB")\r
+};\r
+\r
+#endif\r
+\r
+#define OP_GREEDY MAX_CHAR+1 // * + ? {n}\r
+#define OP_OR MAX_CHAR+2\r
+#define OP_EXPR MAX_CHAR+3 //parentesis ()\r
+#define OP_NOCAPEXPR MAX_CHAR+4 //parentesis (?:)\r
+#define OP_DOT MAX_CHAR+5\r
+#define OP_CLASS MAX_CHAR+6\r
+#define OP_CCLASS MAX_CHAR+7\r
+#define OP_NCLASS MAX_CHAR+8 //negates class the [^\r
+#define OP_RANGE MAX_CHAR+9\r
+#define OP_CHAR MAX_CHAR+10\r
+#define OP_EOL MAX_CHAR+11\r
+#define OP_BOL MAX_CHAR+12\r
+#define OP_WB MAX_CHAR+13\r
+\r
+#define SQREX_SYMBOL_ANY_CHAR '.'\r
+#define SQREX_SYMBOL_GREEDY_ONE_OR_MORE '+'\r
+#define SQREX_SYMBOL_GREEDY_ZERO_OR_MORE '*'\r
+#define SQREX_SYMBOL_GREEDY_ZERO_OR_ONE '?'\r
+#define SQREX_SYMBOL_BRANCH '|'\r
+#define SQREX_SYMBOL_END_OF_STRING '$'\r
+#define SQREX_SYMBOL_BEGINNING_OF_STRING '^'\r
+#define SQREX_SYMBOL_ESCAPE_CHAR '\\'\r
+\r
+\r
+typedef int SQRexNodeType;\r
+\r
+typedef struct tagSQRexNode{\r
+ SQRexNodeType type;\r
+ long left;\r
+ long right;\r
+ int next;\r
+}SQRexNode;\r
+\r
+struct SQRex{\r
+ const SQChar *_eol;\r
+ const SQChar *_bol;\r
+ const SQChar *_p;\r
+ int _first;\r
+ int _op;\r
+ SQRexNode *_nodes;\r
+ int _nallocated;\r
+ int _nsize;\r
+ int _nsubexpr;\r
+ SQRexMatch *_matches;\r
+ int _currsubexp;\r
+ void *_jmpbuf;\r
+ const SQChar **_error;\r
+};\r
+\r
+static int sqstd_rex_list(SQRex *exp);\r
+\r
+static int sqstd_rex_newnode(SQRex *exp, SQRexNodeType type)\r
+{\r
+ SQRexNode n;\r
+ n.type = type;\r
+ n.next = n.right = n.left = -1;\r
+ if(type == OP_EXPR)\r
+ n.right = exp->_nsubexpr++;\r
+ if(exp->_nallocated < (exp->_nsize + 1)) {\r
+ int oldsize = exp->_nallocated;\r
+ exp->_nallocated *= 2;\r
+ exp->_nodes = (SQRexNode *)sq_realloc(exp->_nodes, oldsize * sizeof(SQRexNode) ,exp->_nallocated * sizeof(SQRexNode));\r
+ }\r
+ exp->_nodes[exp->_nsize++] = n;\r
+ return (int)exp->_nsize - 1;\r
+}\r
+\r
+static void sqstd_rex_error(SQRex *exp,const SQChar *error)\r
+{\r
+ if(exp->_error) *exp->_error = error;\r
+ longjmp(*((jmp_buf*)exp->_jmpbuf),-1);\r
+}\r
+\r
+static void sqstd_rex_expect(SQRex *exp, int n){\r
+ if((*exp->_p) != n) \r
+ sqstd_rex_error(exp, _SC("expected paren"));\r
+ exp->_p++;\r
+}\r
+\r
+static SQBool sqstd_rex_ischar(SQChar c)\r
+{\r
+ switch(c) {\r
+ case SQREX_SYMBOL_BRANCH:case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE:\r
+ case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE:case SQREX_SYMBOL_GREEDY_ONE_OR_MORE:\r
+ case SQREX_SYMBOL_BEGINNING_OF_STRING:case SQREX_SYMBOL_END_OF_STRING:\r
+ case SQREX_SYMBOL_ANY_CHAR:case SQREX_SYMBOL_ESCAPE_CHAR:case '(':case ')':case '[':case '{': case '}':\r
+ return SQFalse;\r
+ }\r
+ return SQTrue;\r
+}\r
+\r
+static SQChar sqstd_rex_escapechar(SQRex *exp)\r
+{\r
+ if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR){\r
+ exp->_p++;\r
+ switch(*exp->_p) {\r
+ case 'v': exp->_p++; return '\v';\r
+ case 'n': exp->_p++; return '\n';\r
+ case 't': exp->_p++; return '\t';\r
+ case 'r': exp->_p++; return '\r';\r
+ case 'f': exp->_p++; return '\f';\r
+ default: return (*exp->_p++);\r
+ }\r
+ } else if(!sqstd_rex_ischar(*exp->_p)) sqstd_rex_error(exp,_SC("letter expected"));\r
+ return (*exp->_p++);\r
+}\r
+\r
+static int sqstd_rex_charclass(SQRex *exp,int classid)\r
+{\r
+ int n = sqstd_rex_newnode(exp,OP_CCLASS);\r
+ exp->_nodes[n].left = classid;\r
+ return n;\r
+}\r
+\r
+static int sqstd_rex_charnode(SQRex *exp,SQBool isclass)\r
+{\r
+ if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR) {\r
+ exp->_p++;\r
+ switch(*exp->_p) {\r
+ case 'n': exp->_p++; return sqstd_rex_newnode(exp,'\n');\r
+ case 't': exp->_p++; return sqstd_rex_newnode(exp,'\t');\r
+ case 'r': exp->_p++; return sqstd_rex_newnode(exp,'\r');\r
+ case 'f': exp->_p++; return sqstd_rex_newnode(exp,'\f');\r
+ case 'v': exp->_p++; return sqstd_rex_newnode(exp,'\v');\r
+ case 'a': case 'A': case 'w': case 'W': case 's': case 'S': \r
+ case 'd': case 'D': case 'x': case 'X': case 'c': case 'C': \r
+ case 'p': case 'P': case 'l': case 'u': \r
+ {\r
+ SQChar t = *exp->_p;\r
+ exp->_p++; \r
+ return sqstd_rex_charclass(exp,t);\r
+ }\r
+ case 'b': \r
+ case 'B':\r
+ if(!isclass) {\r
+ int node = sqstd_rex_newnode(exp,OP_WB);\r
+ exp->_nodes[node].left = *exp->_p;\r
+ exp->_p++; \r
+ return node;\r
+ } //else default\r
+ default: return sqstd_rex_newnode(exp,(*exp->_p++));\r
+ }\r
+ }\r
+ else if(!sqstd_rex_ischar(*exp->_p)) {\r
+ \r
+ sqstd_rex_error(exp,_SC("letter expected"));\r
+ }\r
+ return sqstd_rex_newnode(exp,*exp->_p++);\r
+}\r
+static int sqstd_rex_class(SQRex *exp)\r
+{\r
+ int ret = -1;\r
+ int first = -1,chain;\r
+ if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING){\r
+ ret = sqstd_rex_newnode(exp,OP_NCLASS);\r
+ exp->_p++;\r
+ }else ret = sqstd_rex_newnode(exp,OP_CLASS);\r
+ \r
+ if(*exp->_p == ']' || *exp->_p == '-'){\r
+ first = *exp->_p;\r
+ exp->_p++;\r
+ }\r
+ chain = ret;\r
+ while(*exp->_p != ']' && exp->_p != exp->_eol) {\r
+ if(*exp->_p == '-' && first != -1){ \r
+ int r;\r
+ if(*exp->_p++ == ']') sqstd_rex_error(exp,_SC("unfinished range"));\r
+ r = sqstd_rex_newnode(exp,OP_RANGE);\r
+ if(first>*exp->_p) sqstd_rex_error(exp,_SC("invalid range"));\r
+ if(exp->_nodes[first].type == OP_CCLASS) sqstd_rex_error(exp,_SC("cannot use character classes in ranges"));\r
+ exp->_nodes[r].left = exp->_nodes[first].type;\r
+ exp->_nodes[r].right = sqstd_rex_escapechar(exp);\r
+ exp->_nodes[chain].next = r;\r
+ chain = r;\r
+ first = -1;\r
+ }\r
+ else{\r
+ if(first!=-1){\r
+ int c = first;\r
+ exp->_nodes[chain].next = c;\r
+ chain = c;\r
+ first = sqstd_rex_charnode(exp,SQTrue);\r
+ }\r
+ else{\r
+ first = sqstd_rex_charnode(exp,SQTrue);\r
+ }\r
+ }\r
+ }\r
+ if(first!=-1){\r
+ int c = first;\r
+ exp->_nodes[chain].next = c;\r
+ chain = c;\r
+ first = -1;\r
+ }\r
+ /* hack? */\r
+ exp->_nodes[ret].left = exp->_nodes[ret].next;\r
+ exp->_nodes[ret].next = -1;\r
+ return ret;\r
+}\r
+\r
+static int sqstd_rex_parsenumber(SQRex *exp)\r
+{\r
+ int ret = *exp->_p-'0';\r
+ int positions = 10;\r
+ exp->_p++;\r
+ while(isdigit(*exp->_p)) {\r
+ ret = ret*10+(*exp->_p++-'0');\r
+ if(positions==1000000000) sqstd_rex_error(exp,_SC("overflow in numeric constant"));\r
+ positions *= 10;\r
+ };\r
+ return ret;\r
+}\r
+\r
+static int sqstd_rex_element(SQRex *exp)\r
+{\r
+ int ret;\r
+ switch(*exp->_p)\r
+ {\r
+ case '(': {\r
+ int expr;\r
+ exp->_p++;\r
+ \r
+ \r
+ if(*exp->_p =='?') {\r
+ exp->_p++;\r
+ sqstd_rex_expect(exp,':');\r
+ expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR);\r
+ }\r
+ else\r
+ expr = sqstd_rex_newnode(exp,OP_EXPR);\r
+ exp->_nodes[expr].left = sqstd_rex_list(exp);\r
+ ret = expr;\r
+ sqstd_rex_expect(exp,')');\r
+ }\r
+ break;\r
+ case '[':\r
+ exp->_p++;\r
+ ret = sqstd_rex_class(exp);\r
+ sqstd_rex_expect(exp,']');\r
+ break;\r
+ case SQREX_SYMBOL_END_OF_STRING: exp->_p++; ret = sqstd_rex_newnode(exp,OP_EOL);break;\r
+ case SQREX_SYMBOL_ANY_CHAR: exp->_p++; ret = sqstd_rex_newnode(exp,OP_DOT);break;\r
+ default:\r
+ ret = sqstd_rex_charnode(exp,SQFalse);\r
+ break;\r
+ }\r
+ /* scope block */\r
+ {\r
+ int op;\r
+ unsigned short p0 = 0, p1 = 0;\r
+ switch(*exp->_p){\r
+ case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; goto __end;\r
+ case SQREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; goto __end;\r
+ case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; goto __end;\r
+ case '{':{\r
+ exp->_p++;\r
+ if(!isdigit(*exp->_p)) sqstd_rex_error(exp,_SC("number expected"));\r
+ p0 = sqstd_rex_parsenumber(exp);\r
+ switch(*exp->_p) {\r
+ case '}':\r
+ p1 = p0; exp->_p++;\r
+ goto __end;\r
+ case ',':\r
+ exp->_p++;\r
+ p1 = 0xFFFF;\r
+ if(isdigit(*exp->_p)){\r
+ p1 = sqstd_rex_parsenumber(exp);\r
+ }\r
+ sqstd_rex_expect(exp,'}');\r
+ goto __end;\r
+ default:\r
+ sqstd_rex_error(exp,_SC(", or } expected"));\r
+ }\r
+ }\r
+ __end: {\r
+ int nnode = sqstd_rex_newnode(exp,OP_GREEDY);\r
+ op = OP_GREEDY;\r
+ exp->_nodes[nnode].left = ret;\r
+ exp->_nodes[nnode].right = ((p0)<<16)|p1;\r
+ ret = nnode;\r
+ }\r
+ }\r
+ }\r
+ if(*exp->_p != SQREX_SYMBOL_BRANCH && *exp->_p != ')' && *exp->_p != SQREX_SYMBOL_GREEDY_ZERO_OR_MORE && *exp->_p != SQREX_SYMBOL_GREEDY_ONE_OR_MORE && *exp->_p != '\0')\r
+ exp->_nodes[ret].next = sqstd_rex_element(exp);\r
+ return ret;\r
+}\r
+\r
+static int sqstd_rex_list(SQRex *exp)\r
+{\r
+ int ret=-1,e;\r
+ if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING) {\r
+ exp->_p++;\r
+ ret = sqstd_rex_newnode(exp,OP_BOL);\r
+ }\r
+ e = sqstd_rex_element(exp);\r
+ if(ret != -1) {\r
+ exp->_nodes[ret].next = e;\r
+ }\r
+ else ret = e;\r
+\r
+ if(*exp->_p == SQREX_SYMBOL_BRANCH) {\r
+ int temp;\r
+ exp->_p++;\r
+ temp = sqstd_rex_newnode(exp,OP_OR);\r
+ exp->_nodes[temp].left = ret;\r
+ exp->_nodes[temp].right = sqstd_rex_list(exp);\r
+ ret = temp;\r
+ }\r
+ return ret;\r
+}\r
+\r
+static SQBool sqstd_rex_matchcclass(int cclass,SQChar c)\r
+{\r
+ switch(cclass) {\r
+ case 'a': return isalpha(c)?SQTrue:SQFalse;\r
+ case 'A': return !isalpha(c)?SQTrue:SQFalse;\r
+ case 'w': return (isalnum(c) || c == '_')?SQTrue:SQFalse;\r
+ case 'W': return (!isalnum(c) && c != '_')?SQTrue:SQFalse;\r
+ case 's': return isspace(c)?SQTrue:SQFalse;\r
+ case 'S': return !isspace(c)?SQTrue:SQFalse;\r
+ case 'd': return isdigit(c)?SQTrue:SQFalse;\r
+ case 'D': return !isdigit(c)?SQTrue:SQFalse;\r
+ case 'x': return isxdigit(c)?SQTrue:SQFalse;\r
+ case 'X': return !isxdigit(c)?SQTrue:SQFalse;\r
+ case 'c': return iscntrl(c)?SQTrue:SQFalse;\r
+ case 'C': return !iscntrl(c)?SQTrue:SQFalse;\r
+ case 'p': return ispunct(c)?SQTrue:SQFalse;\r
+ case 'P': return !ispunct(c)?SQTrue:SQFalse;\r
+ case 'l': return islower(c)?SQTrue:SQFalse;\r
+ case 'u': return isupper(c)?SQTrue:SQFalse;\r
+ }\r
+ return SQFalse; /*cannot happen*/\r
+}\r
+\r
+static SQBool sqstd_rex_matchclass(SQRex* exp,SQRexNode *node,SQChar c)\r
+{\r
+ do {\r
+ switch(node->type) {\r
+ case OP_RANGE:\r
+ if(c >= node->left && c <= node->right) return SQTrue;\r
+ break;\r
+ case OP_CCLASS:\r
+ if(sqstd_rex_matchcclass(node->left,c)) return SQTrue;\r
+ break;\r
+ default:\r
+ if(c == node->type)return SQTrue;\r
+ }\r
+ } while((node->next != -1) && (node = &exp->_nodes[node->next]));\r
+ return SQFalse;\r
+}\r
+\r
+static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar *str)\r
+{\r
+ SQRexNodeType type = node->type;\r
+ switch(type) {\r
+ case OP_GREEDY: {\r
+ int p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;\r
+ const SQChar *s=str, *good = str;\r
+ while((nmaches == 0xFFFF || nmaches < p1) \r
+ && (s = sqstd_rex_matchnode(exp,&exp->_nodes[node->left],s))) {\r
+ good=s;\r
+ nmaches++;\r
+ if(s >= exp->_eol)\r
+ break;\r
+ }\r
+ if(p0 == p1 && p0 == nmaches) return good;\r
+ else if(nmaches >= p0 && p1 == 0xFFFF) return good;\r
+ else if(nmaches >= p0 && nmaches <= p1) return good;\r
+ return NULL;\r
+ }\r
+ case OP_OR: {\r
+ const SQChar *asd = str;\r
+ SQRexNode *temp=&exp->_nodes[node->left];\r
+ while(asd = sqstd_rex_matchnode(exp,temp,asd)) {\r
+ if(temp->next != -1)\r
+ temp = &exp->_nodes[temp->next];\r
+ else\r
+ return asd;\r
+ }\r
+ asd = str;\r
+ temp = &exp->_nodes[node->right];\r
+ while(asd = sqstd_rex_matchnode(exp,temp,asd)) {\r
+ if(temp->next != -1)\r
+ temp = &exp->_nodes[temp->next];\r
+ else\r
+ return asd;\r
+ }\r
+ return NULL;\r
+ break;\r
+ }\r
+ case OP_EXPR:\r
+ case OP_NOCAPEXPR:{\r
+ SQRexNode *n = &exp->_nodes[node->left];\r
+ const SQChar *cur = str;\r
+ int capture = -1;\r
+ if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {\r
+ capture = exp->_currsubexp;\r
+ exp->_matches[capture].begin = cur;\r
+ exp->_currsubexp++;\r
+ }\r
+\r
+ do {\r
+ if(!(cur = sqstd_rex_matchnode(exp,n,cur))) {\r
+ if(capture != -1){\r
+ exp->_matches[capture].begin = 0;\r
+ exp->_matches[capture].len = 0;\r
+ }\r
+ return NULL;\r
+ }\r
+ } while((n->next != -1) && (n = &exp->_nodes[n->next]));\r
+\r
+ if(capture != -1) \r
+ exp->_matches[capture].len = cur - exp->_matches[capture].begin;\r
+ return cur;\r
+ } \r
+ case OP_WB:\r
+ if(str == exp->_bol && !isspace(*str)\r
+ || (str == exp->_eol && !isspace(*(str-1)))\r
+ || (!isspace(*str) && isspace(*(str+1)))\r
+ || (isspace(*str) && !isspace(*(str+1))) ) {\r
+ return (node->left == 'b')?str:NULL;\r
+ }\r
+ return (node->left == 'b')?NULL:str;\r
+ case OP_BOL:\r
+ if(str == exp->_bol) return str;\r
+ return NULL;\r
+ case OP_EOL:\r
+ if(str == exp->_eol) return str;\r
+ return NULL;\r
+ case OP_DOT:\r
+ *str++;\r
+ return str;\r
+ case OP_NCLASS:\r
+ case OP_CLASS:\r
+ if(sqstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?SQTrue:SQFalse):(type == OP_NCLASS?SQTrue:SQFalse)) {\r
+ *str++;\r
+ return str;\r
+ }\r
+ return NULL;\r
+ case OP_CCLASS:\r
+ if(sqstd_rex_matchcclass(node->left,*str)) {\r
+ *str++;\r
+ return str;\r
+ }\r
+ return NULL;\r
+ default: /* char */\r
+ if(*str != node->type) return NULL;\r
+ *str++;\r
+ return str;\r
+ }\r
+ return NULL;\r
+}\r
+\r
+/* public api */\r
+SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error)\r
+{\r
+ SQRex *exp = (SQRex *)sq_malloc(sizeof(SQRex));\r
+ exp->_p = pattern;\r
+ exp->_nallocated = (int)scstrlen(pattern) * sizeof(SQChar);\r
+ exp->_nodes = (SQRexNode *)sq_malloc(exp->_nallocated * sizeof(SQRexNode));\r
+ exp->_nsize = 0;\r
+ exp->_matches = 0;\r
+ exp->_nsubexpr = 0;\r
+ exp->_first = sqstd_rex_newnode(exp,OP_EXPR);\r
+ exp->_error = error;\r
+ exp->_jmpbuf = sq_malloc(sizeof(jmp_buf));\r
+ if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {\r
+ exp->_nodes[exp->_first].left=sqstd_rex_list(exp);\r
+ if(*exp->_p!='\0')\r
+ sqstd_rex_error(exp,_SC("unexpected character"));\r
+#ifdef _DEBUG\r
+ {\r
+ int nsize,i;\r
+ SQRexNode *t;\r
+ nsize = exp->_nsize;\r
+ t = &exp->_nodes[0];\r
+ scprintf(_SC("\n"));\r
+ for(i = 0;i < nsize; i++) {\r
+ if(exp->_nodes[i].type>MAX_CHAR)\r
+ scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);\r
+ else\r
+ scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type);\r
+ scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next);\r
+ }\r
+ scprintf(_SC("\n"));\r
+ }\r
+#endif\r
+ exp->_matches = (SQRexMatch *) sq_malloc(exp->_nsubexpr * sizeof(SQRexMatch));\r
+ memset(exp->_matches,0,exp->_nsubexpr * sizeof(SQRexMatch));\r
+ }\r
+ else{\r
+ sqstd_rex_free(exp);\r
+ return NULL;\r
+ }\r
+ return exp;\r
+}\r
+\r
+void sqstd_rex_free(SQRex *exp)\r
+{\r
+ if(exp) {\r
+ if(exp->_nodes) sq_free(exp->_nodes,exp->_nallocated * sizeof(SQRexNode));\r
+ if(exp->_jmpbuf) sq_free(exp->_jmpbuf,sizeof(jmp_buf));\r
+ if(exp->_matches) sq_free(exp->_matches,exp->_nsubexpr * sizeof(SQRexMatch));\r
+ sq_free(exp,sizeof(SQRex));\r
+ }\r
+}\r
+\r
+SQBool sqstd_rex_match(SQRex* exp,const SQChar* text)\r
+{\r
+ const SQChar* res = NULL;\r
+ exp->_bol = text;\r
+ exp->_eol = text + scstrlen(text);\r
+ exp->_currsubexp = 0;\r
+ res = sqstd_rex_matchnode(exp,exp->_nodes,text);\r
+ if(res == NULL || res != exp->_eol)\r
+ return SQFalse;\r
+ return SQTrue;\r
+}\r
+\r
+SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end)\r
+{\r
+ const SQChar *cur = NULL;\r
+ int node = exp->_first;\r
+ if(text_begin >= text_end) return SQFalse;\r
+ exp->_bol = text_begin;\r
+ exp->_eol = text_end;\r
+ do {\r
+ cur = text_begin;\r
+ while(node != -1) {\r
+ exp->_currsubexp = 0;\r
+ cur = sqstd_rex_matchnode(exp,&exp->_nodes[node],cur);\r
+ if(!cur)\r
+ break;\r
+ node = exp->_nodes[node].next;\r
+ }\r
+ *text_begin++;\r
+ } while(cur == NULL && text_begin != text_end);\r
+\r
+ if(cur == NULL)\r
+ return SQFalse;\r
+\r
+ --text_begin;\r
+\r
+ if(out_begin) *out_begin = text_begin;\r
+ if(out_end) *out_end = cur;\r
+ return SQTrue;\r
+}\r
+\r
+SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end)\r
+{\r
+ return sqstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);\r
+}\r
+\r
+int sqstd_rex_getsubexpcount(SQRex* exp)\r
+{\r
+ return exp->_nsubexpr;\r
+}\r
+\r
+SQBool sqstd_rex_getsubexp(SQRex* exp, int n, SQRexMatch *subexp)\r
+{\r
+ if( n<0 || n >= exp->_nsubexpr) return SQFalse;\r
+ *subexp = exp->_matches[n];\r
+ return SQTrue;\r
+}\r
+\r