better deal with nonregular exps

This commit is contained in:
Kyryl Melekhin
2022-04-20 15:52:19 +00:00
parent f0da1db9cc
commit 8bee489e34
2 changed files with 34 additions and 10 deletions

15
pike.c
View File

@@ -1,4 +1,4 @@
/* /*
Copyright 2007-2009 Russ Cox. All Rights Reserved. Copyright 2007-2009 Russ Cox. All Rights Reserved.
Copyright 2020-2021 Kyryl Melekhin. All Rights Reserved. Copyright 2020-2021 Kyryl Melekhin. All Rights Reserved.
Use of this source code is governed by a BSD-style Use of this source code is governed by a BSD-style
@@ -422,7 +422,7 @@ int re_comp(rcode *prog, const char *re, int nsubs)
prog->splits = (scnt - SPLIT) / 2; prog->splits = (scnt - SPLIT) / 2;
prog->len = icnt + 2; prog->len = icnt + 2;
prog->presub = sizeof(rsub)+(sizeof(char*) * (nsubs + 1) * 2); prog->presub = sizeof(rsub)+(sizeof(char*) * (nsubs + 1) * 2);
prog->sub = prog->presub * (prog->len - prog->splits + 4); prog->sub = prog->presub * (prog->len - prog->splits + 3);
prog->sparsesz = scnt; prog->sparsesz = scnt;
return RE_SUCCESS; return RE_SUCCESS;
} }
@@ -431,7 +431,8 @@ int re_comp(rcode *prog, const char *re, int nsubs)
if (freesub) \ if (freesub) \
{ s1 = freesub; freesub = s1->freesub; copy } \ { s1 = freesub; freesub = s1->freesub; copy } \
else \ else \
{ s1 = (rsub*)&nsubs[suboff+=rsubsize]; init } \ { if (suboff == prog->sub) suboff = 0; \
s1 = (rsub*)&nsubs[suboff]; suboff += rsubsize; init } \
#define decref(csub) \ #define decref(csub) \
if (--csub->ref == 0) { \ if (--csub->ref == 0) { \
@@ -562,7 +563,7 @@ clistidx = nlistidx; \
int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp) int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
{ {
int rsubsize = prog->presub, suboff = rsubsize; int rsubsize = prog->presub, suboff = 0;
int spc, i, j, c, *npc, osubp = nsubp * sizeof(char*); int spc, i, j, c, *npc, osubp = nsubp * sizeof(char*);
int si = 0, clistidx = 0, nlistidx, mcont = MATCH; int si = 0, clistidx = 0, nlistidx, mcont = MATCH;
const char *sp = s, *_sp = s; const char *sp = s, *_sp = s;
@@ -595,13 +596,11 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
matched: matched:
nlist[nlistidx++].pc = &mcont; nlist[nlistidx++].pc = &mcont;
if (npc != &mcont) { if (npc != &mcont) {
if (matched) { if (matched)
decref(matched) decref(matched)
suboff = 0;
}
matched = nsub; matched = nsub;
} }
if ((sp == _sp || nlistidx == 1) && matched->sub[nsubp/2]) { if (sp == _sp || nlistidx == 1) {
for (i = 0, j = i; i < nsubp; i+=2, j++) { for (i = 0, j = i; i < nsubp; i+=2, j++) {
subp[i] = matched->sub[j]; subp[i] = matched->sub[j];
subp[i+1] = matched->sub[nsubp / 2 + j]; subp[i+1] = matched->sub[nsubp / 2 + j];

29
test.sh
View File

@@ -156,7 +156,10 @@ aaaaa(aa)aa(aa(a)a)?aa
\\\\\\\\\\\\\\\\< \\\\\\\\\\\\\\\\<
[^a]*b [^a]*b
^(.+):([0-9]+):(.+) ^(.+):([0-9]+):(.+)
^(.+):([0-9]+).(.+)
^(.+):([0-9]+).(.+){2,5}
^(.+):([0-9]+):(.+) ^(.+):([0-9]+):(.+)
^(.+):([0-9]+).(.+)
^(.+):([0-9]+):(.+) ^(.+):([0-9]+):(.+)
^(.+):([0-9]+):(.+)(.+) ^(.+):([0-9]+):(.+)(.+)
(.*):([0-9]*):(.*) (.*):([0-9]*):(.*)
@@ -164,6 +167,10 @@ aaaaa(aa)aa(aa(a)a)?aa
(((aaa+)+)bb*)(.*):([0-9]*):(.+) (((aaa+)+)bb*)(.*):([0-9]*):(.+)
^(.+):([0-9]+):(.{8})+ ^(.+):([0-9]+):(.{8})+
^(.+):([0-9]+):((aaaa)|(.+))\" ^(.+):([0-9]+):((aaaa)|(.+))\"
[0-9]+.(.*)
[0-9]+.(.*)
([0-9])+.(.*)
(([0-9])+)(.)(.*)
" "
input="\ input="\
abcdef abcdef
@@ -321,14 +328,21 @@ xabcx
\\\\\\\\< \\\\\\\\<
hhagbdbdbjsjjjda..b hhagbdbdbjsjjjda..b
userspace-api/media/v4l/vbi_625.svg:98: :34bstroke-linejoin:m;stroke-miteit:10;stroke-day:n;se-ty:1\" userspace-api/media/v4l/vbi_625.svg:98: :34bstroke-linejoin:m;stroke-miteit:10;stroke-day:n;se-ty:1\"
userspace-api/media/v4l/vbi_625.svg:98: :34bstroke-linejoin:m;stroke-miteit:10;stroke-day:n;se-ty:1\"
userspace-api/media/v4l/vbi_625.svg:98: :34bstroke-linejoin:m;stroke-miteit:10;stroke-day:n;se-ty:1\"
h:98: :3234434butt;stroke-linejoin:miter;stroke-miterlimit:10;stroke-dasharray:none;stroke-opacity:1\" h:98: :3234434butt;stroke-linejoin:miter;stroke-miterlimit:10;stroke-dasharray:none;stroke-opacity:1\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\" h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\" h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\" h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\" h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
aaaaabb grt:123:.... aaaaabb grt:123:....
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\" h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\" h:98: :3234utt;strokeliin:miter;stroke-mirlimit:10;stroke-dasharray:none;stroke-opacity:1n\"
650-253-0001
650-253-000123434-45551221
650-253-000123434-45551221
650-253-000123434-455512213224hsaqer
" "
expect="\ expect="\
(0,3) (0,3)
@@ -486,7 +500,10 @@ expect="\
(2,5) (2,5)
(3,9) (3,9)
(0,102)(0,35)(36,38)(39,102) (0,102)(0,35)(36,38)(39,102)
(0,102)(0,77)(78,80)(81,102)
(0,102)(0,77)(78,80)(101,102)
(0,103)(0,1)(2,4)(5,103) (0,103)(0,1)(2,4)(5,103)
(0,93)(0,89)(90,91)(92,93)
(0,93)(0,1)(2,4)(5,93) (0,93)(0,1)(2,4)(5,93)
(0,93)(0,1)(2,4)(5,92)(92,93) (0,93)(0,1)(2,4)(5,92)(92,93)
(0,93)(0,1)(2,4)(5,93) (0,93)(0,1)(2,4)(5,93)
@@ -494,6 +511,10 @@ expect="\
(0,20)(0,7)(0,5)(0,5)(7,11)(12,15)(16,20) (0,20)(0,7)(0,5)(0,5)(7,11)(12,15)(16,20)
(0,93)(0,1)(2,4)(85,93) (0,93)(0,1)(2,4)(85,93)
(0,93)(0,1)(2,4)(5,92)(?,?)(5,92) (0,93)(0,1)(2,4)(5,92)(?,?)(5,92)
(0,12)(4,12)
(0,26)(4,26)
(0,26)(2,3)(4,26)
(0,36)(0,3)(2,3)(3,4)(4,36)
(0,0) (0,0)
" "
@@ -507,12 +528,16 @@ printf '%s\n' "$regex" | while read re; do
exp=$(printf '%s\n' "$expect" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}') exp=$(printf '%s\n' "$expect" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}')
var=$(./a.out "$re" "$inp") var=$(./a.out "$re" "$inp")
if [ "$1" ]; then if [ "$1" ]; then
printf '%s\n' "$var" printf '%s\n' "$var"
fi fi
var1=$(printf '%s\n' "$var" | tail -1) var1=$(printf '%s\n' "$var" | tail -1)
if [ ! "$exp" = "$var1" ]; then if [ ! "$exp" = "$var1" ]; then
printf '%s\n' "fail test$c regex:$re input:$inp expect:$exp output:$var1" printf '%s\n' "fail test$c regex:$re input:$inp expect:$exp output:$var1"
exit 1 if [ ! "$1" == 1 ]; then
exit 1
fi
c=$((c+1))
continue
fi fi
time=$(printf '%s\n' "$var" | tail -2 | head -n1) time=$(printf '%s\n' "$var" | tail -2 | head -n1)
printf '%s\n' "pass test$c regex:$re input:$inp expect:$exp output:$var1 $time" printf '%s\n' "pass test$c regex:$re input:$inp expect:$exp output:$var1 $time"