better deal with nonregular exps

This commit is contained in:
Kyryl Melekhin
2022-04-20 15:52:19 +00:00
parent f0da1db9cc
commit 8bee489e34
2 changed files with 34 additions and 10 deletions

13
pike.c
View File

@@ -422,7 +422,7 @@ int re_comp(rcode *prog, const char *re, int nsubs)
prog->splits = (scnt - SPLIT) / 2;
prog->len = icnt + 2;
prog->presub = sizeof(rsub)+(sizeof(char*) * (nsubs + 1) * 2);
prog->sub = prog->presub * (prog->len - prog->splits + 4);
prog->sub = prog->presub * (prog->len - prog->splits + 3);
prog->sparsesz = scnt;
return RE_SUCCESS;
}
@@ -431,7 +431,8 @@ int re_comp(rcode *prog, const char *re, int nsubs)
if (freesub) \
{ s1 = freesub; freesub = s1->freesub; copy } \
else \
{ s1 = (rsub*)&nsubs[suboff+=rsubsize]; init } \
{ if (suboff == prog->sub) suboff = 0; \
s1 = (rsub*)&nsubs[suboff]; suboff += rsubsize; init } \
#define decref(csub) \
if (--csub->ref == 0) { \
@@ -562,7 +563,7 @@ clistidx = nlistidx; \
int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
{
int rsubsize = prog->presub, suboff = rsubsize;
int rsubsize = prog->presub, suboff = 0;
int spc, i, j, c, *npc, osubp = nsubp * sizeof(char*);
int si = 0, clistidx = 0, nlistidx, mcont = MATCH;
const char *sp = s, *_sp = s;
@@ -595,13 +596,11 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
matched:
nlist[nlistidx++].pc = &mcont;
if (npc != &mcont) {
if (matched) {
if (matched)
decref(matched)
suboff = 0;
}
matched = nsub;
}
if ((sp == _sp || nlistidx == 1) && matched->sub[nsubp/2]) {
if (sp == _sp || nlistidx == 1) {
for (i = 0, j = i; i < nsubp; i+=2, j++) {
subp[i] = matched->sub[j];
subp[i+1] = matched->sub[nsubp / 2 + j];

25
test.sh
View File

@@ -156,7 +156,10 @@ aaaaa(aa)aa(aa(a)a)?aa
\\\\\\\\\\\\\\\\<
[^a]*b
^(.+):([0-9]+):(.+)
^(.+):([0-9]+).(.+)
^(.+):([0-9]+).(.+){2,5}
^(.+):([0-9]+):(.+)
^(.+):([0-9]+).(.+)
^(.+):([0-9]+):(.+)
^(.+):([0-9]+):(.+)(.+)
(.*):([0-9]*):(.*)
@@ -164,6 +167,10 @@ aaaaa(aa)aa(aa(a)a)?aa
(((aaa+)+)bb*)(.*):([0-9]*):(.+)
^(.+):([0-9]+):(.{8})+
^(.+):([0-9]+):((aaaa)|(.+))\"
[0-9]+.(.*)
[0-9]+.(.*)
([0-9])+.(.*)
(([0-9])+)(.)(.*)
"
input="\
abcdef
@@ -321,14 +328,21 @@ xabcx
\\\\\\\\<
hhagbdbdbjsjjjda..b
userspace-api/media/v4l/vbi_625.svg:98: :34bstroke-linejoin:m;stroke-miteit:10;stroke-day:n;se-ty:1\"
userspace-api/media/v4l/vbi_625.svg:98: :34bstroke-linejoin:m;stroke-miteit:10;stroke-day:n;se-ty:1\"
userspace-api/media/v4l/vbi_625.svg:98: :34bstroke-linejoin:m;stroke-miteit:10;stroke-day:n;se-ty:1\"
h:98: :3234434butt;stroke-linejoin:miter;stroke-miterlimit:10;stroke-dasharray:none;stroke-opacity:1\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
aaaaabb grt:123:....
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
650-253-0001
650-253-000123434-45551221
650-253-000123434-45551221
650-253-000123434-455512213224hsaqer
"
expect="\
(0,3)
@@ -486,7 +500,10 @@ expect="\
(2,5)
(3,9)
(0,102)(0,35)(36,38)(39,102)
(0,102)(0,77)(78,80)(81,102)
(0,102)(0,77)(78,80)(101,102)
(0,103)(0,1)(2,4)(5,103)
(0,93)(0,89)(90,91)(92,93)
(0,93)(0,1)(2,4)(5,93)
(0,93)(0,1)(2,4)(5,92)(92,93)
(0,93)(0,1)(2,4)(5,93)
@@ -494,6 +511,10 @@ expect="\
(0,20)(0,7)(0,5)(0,5)(7,11)(12,15)(16,20)
(0,93)(0,1)(2,4)(85,93)
(0,93)(0,1)(2,4)(5,92)(?,?)(5,92)
(0,12)(4,12)
(0,26)(4,26)
(0,26)(2,3)(4,26)
(0,36)(0,3)(2,3)(3,4)(4,36)
(0,0)
"
@@ -512,8 +533,12 @@ printf '%s\n' "$regex" | while read re; do
var1=$(printf '%s\n' "$var" | tail -1)
if [ ! "$exp" = "$var1" ]; then
printf '%s\n' "fail test$c regex:$re input:$inp expect:$exp output:$var1"
if [ ! "$1" == 1 ]; then
exit 1
fi
c=$((c+1))
continue
fi
time=$(printf '%s\n' "$var" | tail -2 | head -n1)
printf '%s\n' "pass test$c regex:$re input:$inp expect:$exp output:$var1 $time"
c=$((c+1))