fast on list, add test script

This commit is contained in:
Kyryl Melekhin
2021-07-13 03:41:53 +00:00
parent c37ed995e3
commit 3d4b823a30
2 changed files with 42 additions and 41 deletions

62
pike.c
View File

@@ -45,15 +45,15 @@ enum /* rinst.opcode */
CLASSNOT, CLASSNOT,
NAMEDCLASS, NAMEDCLASS,
// Assert position // Assert position
BOL = 0x50, BOL,
EOL, EOL,
// Instructions which take relative offset as arg // Instructions which take relative offset as arg
JMP = 0x60, JMP,
SPLIT, SPLIT,
RSPLIT, RSPLIT,
// Other (special) instructions // Other (special) instructions
SAVE = 0x7e, SAVE,
MATCH = 0x7f, MATCH,
}; };
// Return codes for re_sizecode() and re_comp() // Return codes for re_sizecode() and re_comp()
@@ -497,40 +497,18 @@ int re_comp(rcode *prog, const char *re, int anchored)
return RE_SUCCESS; return RE_SUCCESS;
} }
void cleanmarks(rcode *prog) static void addthread(const int *pbeg, int *plist, int gen, rthreadlist *l,
{ int *pc, rsub *sub, const char *beg, const char *sp)
int *pc = prog->insts;
int *end = pc + prog->unilen;
while (pc < end) {
*pc &= 0x7f;
switch (*pc) {
case CLASS:
case CLASSNOT:
pc += pc[1] * 2;
case NAMEDCLASS:
case JMP:
case SPLIT:
case RSPLIT:
case SAVE:
case CHAR:
pc++;
break;
}
pc++;
}
}
static void addthread(rthreadlist *l, int *pc, rsub *sub, const char *beg, const char *sp)
{ {
int off; int off;
rec: rec:
if(*pc & 0x80) { if(plist[pc - pbeg] == gen) {
decref(sub); decref(sub);
return; // already on list return; // already on list
} }
*pc |= 0x80; plist[pc - pbeg] = gen;
switch(*pc & 0x7f) { switch(*pc) {
default: default:
l->t[l->n].sub = sub; l->t[l->n].sub = sub;
l->t[l->n++].pc = pc; l->t[l->n++].pc = pc;
@@ -542,14 +520,14 @@ static void addthread(rthreadlist *l, int *pc, rsub *sub, const char *beg, const
case SPLIT: case SPLIT:
off = pc[1]; off = pc[1];
sub->ref++; sub->ref++;
addthread(l, pc+2, sub, beg, sp); addthread(pbeg, plist, gen, l, pc+2, sub, beg, sp);
pc += 2 + off; pc += 2 + off;
goto rec; goto rec;
case RSPLIT: case RSPLIT:
off = pc[1]; off = pc[1];
pc += 2; pc += 2;
sub->ref++; sub->ref++;
addthread(l, pc + off, sub, beg, sp); addthread(pbeg, plist, gen, l, pc + off, sub, beg, sp);
goto rec; goto rec;
case SAVE: case SAVE:
off = pc[1]; off = pc[1];
@@ -569,12 +547,14 @@ static void addthread(rthreadlist *l, int *pc, rsub *sub, const char *beg, const
int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp) int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
{ {
int i, *pc; int i, gen, *pc;
const char *sp; const char *sp;
int plist[prog->unilen];
rsub *sub, *matched = NULL; rsub *sub, *matched = NULL;
rthreadlist _clist[1+prog->len]; rthreadlist _clist[1+prog->len];
rthreadlist _nlist[1+prog->len]; rthreadlist _nlist[1+prog->len];
rthreadlist *clist = _clist, *nlist = _nlist, *tmp; rthreadlist *clist = _clist, *nlist = _nlist, *tmp;
memset(plist, 0, prog->unilen*sizeof(plist[0]));
memset(clist, 0, (1+prog->len)*sizeof(rthread)); memset(clist, 0, (1+prog->len)*sizeof(rthread));
memset(nlist, 0, (1+prog->len)*sizeof(rthread)); memset(nlist, 0, (1+prog->len)*sizeof(rthread));
@@ -586,22 +566,22 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
for(i=0; i<nsubp; i++) for(i=0; i<nsubp; i++)
sub->sub[i] = NULL; sub->sub[i] = NULL;
cleanmarks(prog); gen = 1;
addthread(clist, prog->insts, sub, s, s); addthread(prog->insts, plist, gen, clist, prog->insts, sub, s, s);
for(sp=s;; sp++) { for(sp=s;; sp++) {
if(clist->n == 0) if(clist->n == 0)
break; break;
cleanmarks(prog); gen++;
for(i=0; i<clist->n; i++) { for(i=0; i<clist->n; i++) {
pc = clist->t[i].pc; pc = clist->t[i].pc;
sub = clist->t[i].sub; sub = clist->t[i].sub;
if (inst_is_consumer(*pc & 0x7f) && !*sp) { if (inst_is_consumer(*pc) && !*sp) {
// If we need to match a character, but there's none left, // If we need to match a character, but there's none left,
// it's fail (we don't schedule current thread for continuation) // it's fail (we don't schedule current thread for continuation)
decref(sub); decref(sub);
continue; continue;
} }
switch(*pc++ & 0x7f) { switch(*pc++) {
case CHAR: case CHAR:
if(*sp != *pc++) { if(*sp != *pc++) {
decref(sub); decref(sub);
@@ -609,7 +589,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
} }
case ANY: case ANY:
addthread: addthread:
addthread(nlist, pc, sub, s, sp+1); addthread(prog->insts, plist, gen, nlist, pc, sub, s, sp+1);
break; break;
case CLASS: case CLASS:
case CLASSNOT: case CLASSNOT:
@@ -670,7 +650,7 @@ int main(int argc, char *argv[])
if(sub[k-1]) if(sub[k-1])
break; break;
for(int l=0; l<sub_els; l+=2) { for(int l=0; l<sub_els; l+=2) {
printf(" ("); printf("(");
if(sub[l] == NULL) if(sub[l] == NULL)
printf("?"); printf("?");
else else

21
test.sh Executable file
View File

@@ -0,0 +1,21 @@
regex="abc cde (a|b)|c"
input="abcdef abcdef abc"
expect="(0,3) (2,5) (0,1)(0,1)"
c=1
echo "$regex" | tr ' ' '\n' | while read re; do
inp=$(echo $input | awk -v c=$c '{print $c}')
exp=$(echo $expect | awk -v c=$c '{print $c}')
var=$(echo $(./a.out "$re" "$inp" | awk 'END{print}'))
if [ ! "$exp" = "$var" ]; then
echo "fail test$c regex:$re input:$inp expect:$exp output:$var"
exit 1
fi
c=$((c+1))
done