fast on list, add test script
This commit is contained in:
62
pike.c
62
pike.c
@@ -45,15 +45,15 @@ enum /* rinst.opcode */
|
|||||||
CLASSNOT,
|
CLASSNOT,
|
||||||
NAMEDCLASS,
|
NAMEDCLASS,
|
||||||
// Assert position
|
// Assert position
|
||||||
BOL = 0x50,
|
BOL,
|
||||||
EOL,
|
EOL,
|
||||||
// Instructions which take relative offset as arg
|
// Instructions which take relative offset as arg
|
||||||
JMP = 0x60,
|
JMP,
|
||||||
SPLIT,
|
SPLIT,
|
||||||
RSPLIT,
|
RSPLIT,
|
||||||
// Other (special) instructions
|
// Other (special) instructions
|
||||||
SAVE = 0x7e,
|
SAVE,
|
||||||
MATCH = 0x7f,
|
MATCH,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Return codes for re_sizecode() and re_comp()
|
// Return codes for re_sizecode() and re_comp()
|
||||||
@@ -497,40 +497,18 @@ int re_comp(rcode *prog, const char *re, int anchored)
|
|||||||
return RE_SUCCESS;
|
return RE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cleanmarks(rcode *prog)
|
static void addthread(const int *pbeg, int *plist, int gen, rthreadlist *l,
|
||||||
{
|
int *pc, rsub *sub, const char *beg, const char *sp)
|
||||||
int *pc = prog->insts;
|
|
||||||
int *end = pc + prog->unilen;
|
|
||||||
while (pc < end) {
|
|
||||||
*pc &= 0x7f;
|
|
||||||
switch (*pc) {
|
|
||||||
case CLASS:
|
|
||||||
case CLASSNOT:
|
|
||||||
pc += pc[1] * 2;
|
|
||||||
case NAMEDCLASS:
|
|
||||||
case JMP:
|
|
||||||
case SPLIT:
|
|
||||||
case RSPLIT:
|
|
||||||
case SAVE:
|
|
||||||
case CHAR:
|
|
||||||
pc++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
pc++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void addthread(rthreadlist *l, int *pc, rsub *sub, const char *beg, const char *sp)
|
|
||||||
{
|
{
|
||||||
int off;
|
int off;
|
||||||
rec:
|
rec:
|
||||||
if(*pc & 0x80) {
|
if(plist[pc - pbeg] == gen) {
|
||||||
decref(sub);
|
decref(sub);
|
||||||
return; // already on list
|
return; // already on list
|
||||||
}
|
}
|
||||||
*pc |= 0x80;
|
plist[pc - pbeg] = gen;
|
||||||
|
|
||||||
switch(*pc & 0x7f) {
|
switch(*pc) {
|
||||||
default:
|
default:
|
||||||
l->t[l->n].sub = sub;
|
l->t[l->n].sub = sub;
|
||||||
l->t[l->n++].pc = pc;
|
l->t[l->n++].pc = pc;
|
||||||
@@ -542,14 +520,14 @@ static void addthread(rthreadlist *l, int *pc, rsub *sub, const char *beg, const
|
|||||||
case SPLIT:
|
case SPLIT:
|
||||||
off = pc[1];
|
off = pc[1];
|
||||||
sub->ref++;
|
sub->ref++;
|
||||||
addthread(l, pc+2, sub, beg, sp);
|
addthread(pbeg, plist, gen, l, pc+2, sub, beg, sp);
|
||||||
pc += 2 + off;
|
pc += 2 + off;
|
||||||
goto rec;
|
goto rec;
|
||||||
case RSPLIT:
|
case RSPLIT:
|
||||||
off = pc[1];
|
off = pc[1];
|
||||||
pc += 2;
|
pc += 2;
|
||||||
sub->ref++;
|
sub->ref++;
|
||||||
addthread(l, pc + off, sub, beg, sp);
|
addthread(pbeg, plist, gen, l, pc + off, sub, beg, sp);
|
||||||
goto rec;
|
goto rec;
|
||||||
case SAVE:
|
case SAVE:
|
||||||
off = pc[1];
|
off = pc[1];
|
||||||
@@ -569,12 +547,14 @@ static void addthread(rthreadlist *l, int *pc, rsub *sub, const char *beg, const
|
|||||||
|
|
||||||
int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
|
int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
|
||||||
{
|
{
|
||||||
int i, *pc;
|
int i, gen, *pc;
|
||||||
const char *sp;
|
const char *sp;
|
||||||
|
int plist[prog->unilen];
|
||||||
rsub *sub, *matched = NULL;
|
rsub *sub, *matched = NULL;
|
||||||
rthreadlist _clist[1+prog->len];
|
rthreadlist _clist[1+prog->len];
|
||||||
rthreadlist _nlist[1+prog->len];
|
rthreadlist _nlist[1+prog->len];
|
||||||
rthreadlist *clist = _clist, *nlist = _nlist, *tmp;
|
rthreadlist *clist = _clist, *nlist = _nlist, *tmp;
|
||||||
|
memset(plist, 0, prog->unilen*sizeof(plist[0]));
|
||||||
memset(clist, 0, (1+prog->len)*sizeof(rthread));
|
memset(clist, 0, (1+prog->len)*sizeof(rthread));
|
||||||
memset(nlist, 0, (1+prog->len)*sizeof(rthread));
|
memset(nlist, 0, (1+prog->len)*sizeof(rthread));
|
||||||
|
|
||||||
@@ -586,22 +566,22 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
|
|||||||
for(i=0; i<nsubp; i++)
|
for(i=0; i<nsubp; i++)
|
||||||
sub->sub[i] = NULL;
|
sub->sub[i] = NULL;
|
||||||
|
|
||||||
cleanmarks(prog);
|
gen = 1;
|
||||||
addthread(clist, prog->insts, sub, s, s);
|
addthread(prog->insts, plist, gen, clist, prog->insts, sub, s, s);
|
||||||
for(sp=s;; sp++) {
|
for(sp=s;; sp++) {
|
||||||
if(clist->n == 0)
|
if(clist->n == 0)
|
||||||
break;
|
break;
|
||||||
cleanmarks(prog);
|
gen++;
|
||||||
for(i=0; i<clist->n; i++) {
|
for(i=0; i<clist->n; i++) {
|
||||||
pc = clist->t[i].pc;
|
pc = clist->t[i].pc;
|
||||||
sub = clist->t[i].sub;
|
sub = clist->t[i].sub;
|
||||||
if (inst_is_consumer(*pc & 0x7f) && !*sp) {
|
if (inst_is_consumer(*pc) && !*sp) {
|
||||||
// If we need to match a character, but there's none left,
|
// If we need to match a character, but there's none left,
|
||||||
// it's fail (we don't schedule current thread for continuation)
|
// it's fail (we don't schedule current thread for continuation)
|
||||||
decref(sub);
|
decref(sub);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
switch(*pc++ & 0x7f) {
|
switch(*pc++) {
|
||||||
case CHAR:
|
case CHAR:
|
||||||
if(*sp != *pc++) {
|
if(*sp != *pc++) {
|
||||||
decref(sub);
|
decref(sub);
|
||||||
@@ -609,7 +589,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
|
|||||||
}
|
}
|
||||||
case ANY:
|
case ANY:
|
||||||
addthread:
|
addthread:
|
||||||
addthread(nlist, pc, sub, s, sp+1);
|
addthread(prog->insts, plist, gen, nlist, pc, sub, s, sp+1);
|
||||||
break;
|
break;
|
||||||
case CLASS:
|
case CLASS:
|
||||||
case CLASSNOT:
|
case CLASSNOT:
|
||||||
@@ -670,7 +650,7 @@ int main(int argc, char *argv[])
|
|||||||
if(sub[k-1])
|
if(sub[k-1])
|
||||||
break;
|
break;
|
||||||
for(int l=0; l<sub_els; l+=2) {
|
for(int l=0; l<sub_els; l+=2) {
|
||||||
printf(" (");
|
printf("(");
|
||||||
if(sub[l] == NULL)
|
if(sub[l] == NULL)
|
||||||
printf("?");
|
printf("?");
|
||||||
else
|
else
|
||||||
|
|||||||
21
test.sh
Executable file
21
test.sh
Executable file
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
regex="abc cde (a|b)|c"
|
||||||
|
input="abcdef abcdef abc"
|
||||||
|
expect="(0,3) (2,5) (0,1)(0,1)"
|
||||||
|
|
||||||
|
c=1
|
||||||
|
echo "$regex" | tr ' ' '\n' | while read re; do
|
||||||
|
inp=$(echo $input | awk -v c=$c '{print $c}')
|
||||||
|
exp=$(echo $expect | awk -v c=$c '{print $c}')
|
||||||
|
var=$(echo $(./a.out "$re" "$inp" | awk 'END{print}'))
|
||||||
|
if [ ! "$exp" = "$var" ]; then
|
||||||
|
echo "fail test$c regex:$re input:$inp expect:$exp output:$var"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
c=$((c+1))
|
||||||
|
done
|
||||||
|
|
||||||
Reference in New Issue
Block a user