diff --git a/README b/README index ac7c2da..7c4f89b 100644 --- a/README +++ b/README @@ -32,7 +32,6 @@ so that the user does not need to waste time taking strlen() * Support for quoted chars in regex. Escapes in brackets. * Support for ^, $ assertions in regex. * Support for repetition operator {n} and {n,m} and {n,}. -- Note: cases with 0 are not handled, avoid them, they can easily be replaced. * Support for Unicode (UTF-8). * Unlike other engines, the output is byte level offset. (Which is more useful) * Support for non capture group ?: diff --git a/pike.c b/pike.c index 75d393d..3d689b3 100644 --- a/pike.c +++ b/pike.c @@ -268,7 +268,7 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode) } break; case '{':; - int maxcnt = 0, mincnt = 0, i = 0, size = PC - term; + int i, maxcnt = 0, mincnt = 0, size = PC - term, nojmp = 0; re++; while (isdigit((unsigned char) *re)) mincnt = mincnt * 10 + *re++ - '0'; @@ -279,12 +279,21 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode) EMIT(PC+1, REL(PC, PC - size)); PC += 2; maxcnt = mincnt; + nojmp = 1; } while (isdigit((unsigned char) *re)) maxcnt = maxcnt * 10 + *re++ - '0'; } else maxcnt = mincnt; - for (; i < mincnt-1; i++) { + if (!mincnt && !maxcnt) { + zcase: + INSERT_CODE(term, 2, PC); + EMIT(term, nojmp ? SPLIT : JMP); + EMIT(term + 1, REL(term, PC)); + term = PC; + break; + } + for (i = 0; i < mincnt-1; i++) { if (code) memcpy(&code[PC], &code[term], size*sizeof(int)); PC += size; @@ -296,6 +305,10 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode) memcpy(&code[PC], &code[term], size*sizeof(int)); PC += size; } + if (!mincnt && maxcnt) { + nojmp = 1; + goto zcase; + } break; case '?': if (PC == term) diff --git a/test.sh b/test.sh index b0ee306..2432ea0 100755 --- a/test.sh +++ b/test.sh @@ -1,6 +1,20 @@ #!/bin/sh regex="\ +a{0} +(aaaa){0,} +(aaaa){0,0} +(aaa+a){0,} +(aaaa){0,0}|a +(aaaa){0,0}|abc +(aaaa){0,}|bc +(aaaa){0,2}|bc +(aaaa){0,2}|bc +(aaaa){0,2}|bc +bc{0}|(aaaa){0,2} +(bc{0,})+|(aaaa){0,2} +(bc{0,}){3,5}|(aaaa){0,2} +(bc{0,}){3,5}|(aaaa){0,2} abc cde abc* @@ -195,6 +209,20 @@ aaaaa(aa)aa(aa(a)a)?aa (((?:(?:(?:ffffff(a)?ffff)+)+?)*)*)+ " input="\ +aaaaaaaaaa +aaaaaaaaaa +aaaaaaaaaa +aaaaaaaaaa +aaaaaaaaaa +aaaaaaaaaa +aaaaaaaaaa +aaaaaa +aaaa +aaa +ccc +bbbbccc +bbbbbccccc +bbbbbbccccc abcdef abcdef abdef @@ -389,6 +417,20 @@ fffffaffffffffff fffffffffffffffffffffffff " expect="\ +(0,0) +(0,8)(4,8) +(0,0)(?,?) +(0,10)(0,10) +(0,0)(?,?) +(0,0)(?,?) +(0,8)(4,8) +(0,4)(0,4) +(0,4)(0,4) +(0,0)(?,?) +(0,0)(?,?) +(0,7)(3,7)(?,?) +(0,10)(4,10)(?,?) +(0,5)(4,5)(?,?) (0,3) (2,5) (0,2)