pike.c: handle 0 case in repetition
This commit is contained in:
1
README
1
README
@@ -32,7 +32,6 @@ so that the user does not need to waste time taking strlen()
|
|||||||
* Support for quoted chars in regex. Escapes in brackets.
|
* Support for quoted chars in regex. Escapes in brackets.
|
||||||
* Support for ^, $ assertions in regex.
|
* Support for ^, $ assertions in regex.
|
||||||
* Support for repetition operator {n} and {n,m} and {n,}.
|
* Support for repetition operator {n} and {n,m} and {n,}.
|
||||||
- Note: cases with 0 are not handled, avoid them, they can easily be replaced.
|
|
||||||
* Support for Unicode (UTF-8).
|
* Support for Unicode (UTF-8).
|
||||||
* Unlike other engines, the output is byte level offset. (Which is more useful)
|
* Unlike other engines, the output is byte level offset. (Which is more useful)
|
||||||
* Support for non capture group ?:
|
* Support for non capture group ?:
|
||||||
|
|||||||
17
pike.c
17
pike.c
@@ -268,7 +268,7 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case '{':;
|
case '{':;
|
||||||
int maxcnt = 0, mincnt = 0, i = 0, size = PC - term;
|
int i, maxcnt = 0, mincnt = 0, size = PC - term, nojmp = 0;
|
||||||
re++;
|
re++;
|
||||||
while (isdigit((unsigned char) *re))
|
while (isdigit((unsigned char) *re))
|
||||||
mincnt = mincnt * 10 + *re++ - '0';
|
mincnt = mincnt * 10 + *re++ - '0';
|
||||||
@@ -279,12 +279,21 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode)
|
|||||||
EMIT(PC+1, REL(PC, PC - size));
|
EMIT(PC+1, REL(PC, PC - size));
|
||||||
PC += 2;
|
PC += 2;
|
||||||
maxcnt = mincnt;
|
maxcnt = mincnt;
|
||||||
|
nojmp = 1;
|
||||||
}
|
}
|
||||||
while (isdigit((unsigned char) *re))
|
while (isdigit((unsigned char) *re))
|
||||||
maxcnt = maxcnt * 10 + *re++ - '0';
|
maxcnt = maxcnt * 10 + *re++ - '0';
|
||||||
} else
|
} else
|
||||||
maxcnt = mincnt;
|
maxcnt = mincnt;
|
||||||
for (; i < mincnt-1; i++) {
|
if (!mincnt && !maxcnt) {
|
||||||
|
zcase:
|
||||||
|
INSERT_CODE(term, 2, PC);
|
||||||
|
EMIT(term, nojmp ? SPLIT : JMP);
|
||||||
|
EMIT(term + 1, REL(term, PC));
|
||||||
|
term = PC;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
for (i = 0; i < mincnt-1; i++) {
|
||||||
if (code)
|
if (code)
|
||||||
memcpy(&code[PC], &code[term], size*sizeof(int));
|
memcpy(&code[PC], &code[term], size*sizeof(int));
|
||||||
PC += size;
|
PC += size;
|
||||||
@@ -296,6 +305,10 @@ static int compilecode(const char *re_loc, rcode *prog, int sizecode)
|
|||||||
memcpy(&code[PC], &code[term], size*sizeof(int));
|
memcpy(&code[PC], &code[term], size*sizeof(int));
|
||||||
PC += size;
|
PC += size;
|
||||||
}
|
}
|
||||||
|
if (!mincnt && maxcnt) {
|
||||||
|
nojmp = 1;
|
||||||
|
goto zcase;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case '?':
|
case '?':
|
||||||
if (PC == term)
|
if (PC == term)
|
||||||
|
|||||||
42
test.sh
42
test.sh
@@ -1,6 +1,20 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
regex="\
|
regex="\
|
||||||
|
a{0}
|
||||||
|
(aaaa){0,}
|
||||||
|
(aaaa){0,0}
|
||||||
|
(aaa+a){0,}
|
||||||
|
(aaaa){0,0}|a
|
||||||
|
(aaaa){0,0}|abc
|
||||||
|
(aaaa){0,}|bc
|
||||||
|
(aaaa){0,2}|bc
|
||||||
|
(aaaa){0,2}|bc
|
||||||
|
(aaaa){0,2}|bc
|
||||||
|
bc{0}|(aaaa){0,2}
|
||||||
|
(bc{0,})+|(aaaa){0,2}
|
||||||
|
(bc{0,}){3,5}|(aaaa){0,2}
|
||||||
|
(bc{0,}){3,5}|(aaaa){0,2}
|
||||||
abc
|
abc
|
||||||
cde
|
cde
|
||||||
abc*
|
abc*
|
||||||
@@ -195,6 +209,20 @@ aaaaa(aa)aa(aa(a)a)?aa
|
|||||||
(((?:(?:(?:ffffff(a)?ffff)+)+?)*)*)+
|
(((?:(?:(?:ffffff(a)?ffff)+)+?)*)*)+
|
||||||
"
|
"
|
||||||
input="\
|
input="\
|
||||||
|
aaaaaaaaaa
|
||||||
|
aaaaaaaaaa
|
||||||
|
aaaaaaaaaa
|
||||||
|
aaaaaaaaaa
|
||||||
|
aaaaaaaaaa
|
||||||
|
aaaaaaaaaa
|
||||||
|
aaaaaaaaaa
|
||||||
|
aaaaaa
|
||||||
|
aaaa
|
||||||
|
aaa
|
||||||
|
ccc
|
||||||
|
bbbbccc
|
||||||
|
bbbbbccccc
|
||||||
|
bbbbbbccccc
|
||||||
abcdef
|
abcdef
|
||||||
abcdef
|
abcdef
|
||||||
abdef
|
abdef
|
||||||
@@ -389,6 +417,20 @@ fffffaffffffffff
|
|||||||
fffffffffffffffffffffffff
|
fffffffffffffffffffffffff
|
||||||
"
|
"
|
||||||
expect="\
|
expect="\
|
||||||
|
(0,0)
|
||||||
|
(0,8)(4,8)
|
||||||
|
(0,0)(?,?)
|
||||||
|
(0,10)(0,10)
|
||||||
|
(0,0)(?,?)
|
||||||
|
(0,0)(?,?)
|
||||||
|
(0,8)(4,8)
|
||||||
|
(0,4)(0,4)
|
||||||
|
(0,4)(0,4)
|
||||||
|
(0,0)(?,?)
|
||||||
|
(0,0)(?,?)
|
||||||
|
(0,7)(3,7)(?,?)
|
||||||
|
(0,10)(4,10)(?,?)
|
||||||
|
(0,5)(4,5)(?,?)
|
||||||
(0,3)
|
(0,3)
|
||||||
(2,5)
|
(2,5)
|
||||||
(0,2)
|
(0,2)
|
||||||
|
|||||||
Reference in New Issue
Block a user