pike.c: handle 0 case in repetition

This commit is contained in:
Kyryl Melekhin
2025-11-01 20:44:03 +00:00
parent 36c27822f5
commit a43ab17911
3 changed files with 57 additions and 3 deletions

1
README
View File

@@ -32,7 +32,6 @@ so that the user does not need to waste time taking strlen()
* Support for quoted chars in regex. Escapes in brackets. * Support for quoted chars in regex. Escapes in brackets.
* Support for ^, $ assertions in regex. * Support for ^, $ assertions in regex.
* Support for repetition operator {n} and {n,m} and {n,}. * Support for repetition operator {n} and {n,m} and {n,}.
- Note: cases with 0 are not handled, avoid them, they can easily be replaced.
* Support for Unicode (UTF-8). * Support for Unicode (UTF-8).
* Unlike other engines, the output is byte level offset. (Which is more useful) * Unlike other engines, the output is byte level offset. (Which is more useful)
* Support for non capture group ?: * Support for non capture group ?:

17
pike.c
View File

@@ -268,7 +268,7 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode)
} }
break; break;
case '{':; case '{':;
int maxcnt = 0, mincnt = 0, i = 0, size = PC - term; int i, maxcnt = 0, mincnt = 0, size = PC - term, nojmp = 0;
re++; re++;
while (isdigit((unsigned char) *re)) while (isdigit((unsigned char) *re))
mincnt = mincnt * 10 + *re++ - '0'; mincnt = mincnt * 10 + *re++ - '0';
@@ -279,12 +279,21 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode)
EMIT(PC+1, REL(PC, PC - size)); EMIT(PC+1, REL(PC, PC - size));
PC += 2; PC += 2;
maxcnt = mincnt; maxcnt = mincnt;
nojmp = 1;
} }
while (isdigit((unsigned char) *re)) while (isdigit((unsigned char) *re))
maxcnt = maxcnt * 10 + *re++ - '0'; maxcnt = maxcnt * 10 + *re++ - '0';
} else } else
maxcnt = mincnt; maxcnt = mincnt;
for (; i < mincnt-1; i++) { if (!mincnt && !maxcnt) {
zcase:
INSERT_CODE(term, 2, PC);
EMIT(term, nojmp ? SPLIT : JMP);
EMIT(term + 1, REL(term, PC));
term = PC;
break;
}
for (i = 0; i < mincnt-1; i++) {
if (code) if (code)
memcpy(&code[PC], &code[term], size*sizeof(int)); memcpy(&code[PC], &code[term], size*sizeof(int));
PC += size; PC += size;
@@ -296,6 +305,10 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode)
memcpy(&code[PC], &code[term], size*sizeof(int)); memcpy(&code[PC], &code[term], size*sizeof(int));
PC += size; PC += size;
} }
if (!mincnt && maxcnt) {
nojmp = 1;
goto zcase;
}
break; break;
case '?': case '?':
if (PC == term) if (PC == term)

42
test.sh
View File

@@ -1,6 +1,20 @@
#!/bin/sh #!/bin/sh
regex="\ regex="\
a{0}
(aaaa){0,}
(aaaa){0,0}
(aaa+a){0,}
(aaaa){0,0}|a
(aaaa){0,0}|abc
(aaaa){0,}|bc
(aaaa){0,2}|bc
(aaaa){0,2}|bc
(aaaa){0,2}|bc
bc{0}|(aaaa){0,2}
(bc{0,})+|(aaaa){0,2}
(bc{0,}){3,5}|(aaaa){0,2}
(bc{0,}){3,5}|(aaaa){0,2}
abc abc
cde cde
abc* abc*
@@ -195,6 +209,20 @@ aaaaa(aa)aa(aa(a)a)?aa
(((?:(?:(?:ffffff(a)?ffff)+)+?)*)*)+ (((?:(?:(?:ffffff(a)?ffff)+)+?)*)*)+
" "
input="\ input="\
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaa
aaaa
aaa
ccc
bbbbccc
bbbbbccccc
bbbbbbccccc
abcdef abcdef
abcdef abcdef
abdef abdef
@@ -389,6 +417,20 @@ fffffaffffffffff
fffffffffffffffffffffffff fffffffffffffffffffffffff
" "
expect="\ expect="\
(0,0)
(0,8)(4,8)
(0,0)(?,?)
(0,10)(0,10)
(0,0)(?,?)
(0,0)(?,?)
(0,8)(4,8)
(0,4)(0,4)
(0,4)(0,4)
(0,0)(?,?)
(0,0)(?,?)
(0,7)(3,7)(?,?)
(0,10)(4,10)(?,?)
(0,5)(4,5)(?,?)
(0,3) (0,3)
(2,5) (2,5)
(0,2) (0,2)