more ambiguous input testing, fixes

This commit is contained in:
Kyryl Melekhin
2021-10-21 16:35:33 +00:00
parent f038068dbb
commit 91e630abe9
2 changed files with 60 additions and 4 deletions

16
pike.c
View File

@@ -92,6 +92,7 @@ typedef struct rsub rsub;
struct rsub
{
int ref;
rsub *freesub;
const char *sub[];
};
@@ -449,13 +450,13 @@ int re_comp(rcode *prog, const char *re, int nsubs)
#define newsub(init, copy) \
if (freesub) \
{ s1 = freesub; freesub = (rsub*)s1->sub[0]; copy } \
{ s1 = freesub; freesub = s1->freesub; copy } \
else \
{ s1 = (rsub*)&nsubs[suboff+=rsubsize]; init } \
#define decref(csub) \
if (--csub->ref == 0) { \
csub->sub[0] = (char*)freesub; \
csub->freesub = freesub; \
freesub = csub; \
} \
@@ -470,6 +471,10 @@ plist[plistidx++] = npc; \
#define onclist(nn) \
#define endnlist() if (*npc == MATCH) nmatch = 1; \
#define endclist() \
#define fastrec(nn, list, listidx) \
nsub->ref++; \
if (*npc < WBEG) { \
@@ -535,6 +540,7 @@ case EOL: \
nsub = subs[i]; \
goto rec##nn; \
} \
end##list() \
continue; \
} \
next##nn: \
@@ -563,7 +569,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
{
int rsubsize = sizeof(rsub)+(sizeof(char*)*nsubp);
int i, j, c, suboff = rsubsize, *npc, osubp = nsubp * sizeof(char*);
int clistidx = 0, nlistidx = 0, plistidx = 0;
int clistidx = 0, nlistidx, plistidx, nmatch;
const char *sp = s, *_sp = s;
int *insts = prog->insts;
int *pcs[prog->splits], *plist[prog->splits];
@@ -576,6 +582,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
for (;; sp = _sp) {
uc_len(i, sp) uc_code(c, sp)
_sp = sp+i;
nlistidx = 0; plistidx = 0; nmatch = 0;
for (i = 0; i < clistidx; i++) {
npc = clist[i].pc;
nsub = clist[i].sub;
@@ -585,6 +592,8 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
break;
case ANY:
addthread:
if (nmatch)
break;
addthread(2, nlist, nlistidx)
case CLASS:
if (!re_classmatch(npc, c))
@@ -608,7 +617,6 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
clist = nlist;
nlist = tmp;
clistidx = nlistidx;
nlistidx = 0; plistidx = 0;
if (!matched) {
jmp_start:
newsub(memset(s1->sub, 0, osubp);, /*nop*/)

48
test.sh
View File

@@ -124,6 +124,22 @@ aaaaa(aa)aa(aa(a)a)?aa
.{5}
.{10,15}
(a(abc)+){3,}
(aa(aa)|a(a)a|a){3,4}
(aa(aa)|a(a)a|a){3,4}
(aa(aa)|a(a)a|a){3,4}
(aa(aa)|a(a)a|a){3,4}
(aa(aa)|a(a)a|a){3,4}
(aa(aa)|a(a)a|a){3,4}
(aa(aa)|a(a)a|a){3,4}
(a(a)(aa)|aaa|a){3,4}
(a(a)(aa)|aaa|a){6}
(a(a)(aa)|aaa|a){5,8}
(a(a)(aa)|(aa)a|(a)){9}
(a(a)(aa)|(aa)a|(a)){10}
(a(a)(aa)|(aa)a|(a)){11}
(a(a)a|(a)|a(aa)|aa){2,5}
((a)a|a(aa)|(aaaa)|(a*)){3,10}
((a)a|a(aa)|(aaaa)|(a+)){3,10}
"
input="\
abcdef
@@ -249,6 +265,22 @@ OBJ = \$(SRC:.c=.o)
рврыр
рврырdhsjhh
aabcabcaabcaabc
aaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaa
aaaaaaaaaaaaaa
aaaaaaaaaaaaa
aaaaaaaaaaaa
aaaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaa
aaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaa
"
expect="\
(0,3)
@@ -374,6 +406,22 @@ expect="\
(0,10)
(0,16)
(0,15)(11,15)(12,15)
(0,16)(12,16)(14,16)(?,?)
(0,15)(12,15)(10,12)(13,14)
(0,13)(12,13)(10,12)(?,?)
(0,13)(12,13)(10,12)(?,?)
(0,12)(8,12)(10,12)(?,?)
(0,11)(8,11)(6,8)(9,10)
(0,10)(9,10)(6,8)(?,?)
(0,10)(9,10)(5,6)(6,8)
(0,9)(8,9)(1,2)(2,4)
(0,10)(9,10)(1,2)(2,4)
(0,9)(8,9)(?,?)(?,?)(?,?)(8,9)
(0,10)(9,10)(?,?)(?,?)(?,?)(9,10)
-nomatch-
(0,13)(12,13)(10,11)(12,13)(?,?)
(0,17)(17,17)(14,15)(?,?)(?,?)(17,17)
(0,17)(16,17)(14,15)(?,?)(?,?)(16,17)
(0,0)
"