get rid of all globals, inline/optimize
This commit is contained in:
233
pike.c
233
pike.c
@@ -70,6 +70,7 @@ struct rcode
|
|||||||
int unilen;
|
int unilen;
|
||||||
int len;
|
int len;
|
||||||
int sub;
|
int sub;
|
||||||
|
int splits;
|
||||||
int insts[];
|
int insts[];
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -105,7 +106,6 @@ typedef struct rsub rsub;
|
|||||||
struct rsub
|
struct rsub
|
||||||
{
|
{
|
||||||
int ref;
|
int ref;
|
||||||
int nsub;
|
|
||||||
const char *sub[128];
|
const char *sub[128];
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -137,46 +137,6 @@ void re_fatal(char *msg)
|
|||||||
exit(2);
|
exit(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static rsub *freesub;
|
|
||||||
static rsub subs[10];
|
|
||||||
static int subidx;
|
|
||||||
|
|
||||||
rsub* newsub(int n)
|
|
||||||
{
|
|
||||||
rsub *s = freesub;
|
|
||||||
if(s != NULL)
|
|
||||||
freesub = (rsub*)s->sub[0];
|
|
||||||
else
|
|
||||||
s = &subs[subidx++];
|
|
||||||
s->nsub = n;
|
|
||||||
s->ref = 1;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
rsub* update(rsub *s, int i, const char *p)
|
|
||||||
{
|
|
||||||
rsub *s1;
|
|
||||||
int j;
|
|
||||||
|
|
||||||
if(s->ref > 1) {
|
|
||||||
s1 = newsub(s->nsub);
|
|
||||||
for(j=0; j<s->nsub; j++)
|
|
||||||
s1->sub[j] = s->sub[j];
|
|
||||||
s->ref--;
|
|
||||||
s = s1;
|
|
||||||
}
|
|
||||||
s->sub[i] = p;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void decref(rsub *s)
|
|
||||||
{
|
|
||||||
if(--s->ref == 0) {
|
|
||||||
s->sub[0] = (char*)freesub;
|
|
||||||
freesub = s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int re_classmatch(const int *pc, const char *sp)
|
int re_classmatch(const int *pc, const char *sp)
|
||||||
{
|
{
|
||||||
// pc points to "classnot" byte after opcode
|
// pc points to "classnot" byte after opcode
|
||||||
@@ -382,6 +342,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
|
|||||||
split = *(re+1) == '[' ? RSPLIT : SPLIT;
|
split = *(re+1) == '[' ? RSPLIT : SPLIT;
|
||||||
for (i = maxcnt-mincnt; i > 0; i--)
|
for (i = maxcnt-mincnt; i > 0; i--)
|
||||||
{
|
{
|
||||||
|
prog->splits++;
|
||||||
EMIT(PC++, split);
|
EMIT(PC++, split);
|
||||||
EMIT(PC++, REL(PC, PC+((size+2)*i)));
|
EMIT(PC++, REL(PC, PC+((size+2)*i)));
|
||||||
if (code)
|
if (code)
|
||||||
@@ -414,6 +375,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
|
|||||||
}
|
}
|
||||||
EMIT(term + 1, REL(term, PC));
|
EMIT(term + 1, REL(term, PC));
|
||||||
prog->len++;
|
prog->len++;
|
||||||
|
prog->splits++;
|
||||||
term = PC;
|
term = PC;
|
||||||
break;
|
break;
|
||||||
case '*':
|
case '*':
|
||||||
@@ -429,6 +391,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
|
|||||||
EMIT(term, SPLIT);
|
EMIT(term, SPLIT);
|
||||||
}
|
}
|
||||||
EMIT(term + 1, REL(term, PC));
|
EMIT(term + 1, REL(term, PC));
|
||||||
|
prog->splits++;
|
||||||
prog->len += 2;
|
prog->len += 2;
|
||||||
term = PC;
|
term = PC;
|
||||||
break;
|
break;
|
||||||
@@ -442,6 +405,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
|
|||||||
}
|
}
|
||||||
EMIT(PC + 1, REL(PC, term));
|
EMIT(PC + 1, REL(PC, term));
|
||||||
PC += 2;
|
PC += 2;
|
||||||
|
prog->splits++;
|
||||||
prog->len++;
|
prog->len++;
|
||||||
term = PC;
|
term = PC;
|
||||||
break;
|
break;
|
||||||
@@ -454,6 +418,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
|
|||||||
alt_label = PC++;
|
alt_label = PC++;
|
||||||
EMIT(start, SPLIT);
|
EMIT(start, SPLIT);
|
||||||
EMIT(start + 1, REL(start, PC));
|
EMIT(start + 1, REL(start, PC));
|
||||||
|
prog->splits++;
|
||||||
prog->len += 2;
|
prog->len += 2;
|
||||||
term = PC;
|
term = PC;
|
||||||
break;
|
break;
|
||||||
@@ -502,6 +467,7 @@ int re_comp(rcode *prog, const char *re, int anchored)
|
|||||||
prog->len = 0;
|
prog->len = 0;
|
||||||
prog->unilen = 0;
|
prog->unilen = 0;
|
||||||
prog->sub = 0;
|
prog->sub = 0;
|
||||||
|
prog->splits = 0;
|
||||||
|
|
||||||
// Add code to implement non-anchored operation ("search").
|
// Add code to implement non-anchored operation ("search").
|
||||||
// For anchored operation ("match"), this code will be just skipped.
|
// For anchored operation ("match"), this code will be just skipped.
|
||||||
@@ -516,6 +482,7 @@ int re_comp(rcode *prog, const char *re, int anchored)
|
|||||||
prog->insts[prog->unilen++] = SAVE;
|
prog->insts[prog->unilen++] = SAVE;
|
||||||
prog->insts[prog->unilen++] = 0;
|
prog->insts[prog->unilen++] = 0;
|
||||||
prog->len += 4;
|
prog->len += 4;
|
||||||
|
prog->splits++;
|
||||||
}
|
}
|
||||||
int res = _compilecode(&re, prog, /*sizecode*/0);
|
int res = _compilecode(&re, prog, /*sizecode*/0);
|
||||||
if (res < 0) return res;
|
if (res < 0) return res;
|
||||||
@@ -524,136 +491,141 @@ int re_comp(rcode *prog, const char *re, int anchored)
|
|||||||
|
|
||||||
prog->insts[prog->unilen++] = SAVE;
|
prog->insts[prog->unilen++] = SAVE;
|
||||||
prog->insts[prog->unilen++] = 1;
|
prog->insts[prog->unilen++] = 1;
|
||||||
|
|
||||||
prog->insts[prog->unilen++] = MATCH;
|
prog->insts[prog->unilen++] = MATCH;
|
||||||
prog->len += 2;
|
prog->len += 2;
|
||||||
|
|
||||||
return RE_SUCCESS;
|
return RE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void addthread(const int *pbeg, int *plist, int gen, rthreadlist *l,
|
#define addthread(nn, list, _pc, _sub, _sp, cont) \
|
||||||
int *pc, rsub *sub, const char *beg, const char *sp)
|
{ \
|
||||||
{
|
int i = 0, j, *pc = _pc; \
|
||||||
int i = 0, *pcs[10];
|
rsub *s1, *sub = _sub; \
|
||||||
rsub *subs[10];
|
rec##nn: \
|
||||||
rec:
|
if(plist[pc - prog->insts] == gen) { \
|
||||||
if(plist[pc - pbeg] == gen) {
|
sub->ref--; \
|
||||||
decref(sub);
|
rec_check##nn: \
|
||||||
rec_check:
|
if (i) { \
|
||||||
if (i) {
|
pc = pcs[--i]; \
|
||||||
pc = pcs[--i];
|
sub = subs[i]; \
|
||||||
sub = subs[i];
|
goto rec##nn; \
|
||||||
goto rec;
|
} \
|
||||||
}
|
cont; \
|
||||||
return; // already on list
|
} \
|
||||||
}
|
plist[pc - prog->insts] = gen; \
|
||||||
plist[pc - pbeg] = gen;
|
switch(*pc) { \
|
||||||
|
default: \
|
||||||
switch(*pc) {
|
list->t[list->n].sub = sub; \
|
||||||
default:
|
list->t[list->n++].pc = pc; \
|
||||||
l->t[l->n].sub = sub;
|
goto rec_check##nn; \
|
||||||
l->t[l->n++].pc = pc;
|
case JMP: \
|
||||||
goto rec_check;
|
pc += 2 + pc[1]; \
|
||||||
case JMP:
|
goto rec##nn; \
|
||||||
pc += 2 + pc[1];
|
case SPLIT: \
|
||||||
goto rec;
|
subs[i] = sub; \
|
||||||
case SPLIT:
|
sub->ref++; \
|
||||||
subs[i] = sub;
|
pc += 2; \
|
||||||
sub->ref++;
|
pcs[i++] = pc + pc[-1]; \
|
||||||
pc += 2;
|
goto rec##nn; \
|
||||||
pcs[i++] = pc + pc[-1];
|
case RSPLIT: \
|
||||||
goto rec;
|
subs[i] = sub; \
|
||||||
case RSPLIT:
|
sub->ref++; \
|
||||||
subs[i] = sub;
|
pc += 2; \
|
||||||
sub->ref++;
|
pcs[i++] = pc; \
|
||||||
pc += 2;
|
pc += pc[-1]; \
|
||||||
pcs[i++] = pc;
|
goto rec##nn; \
|
||||||
pc += pc[-1];
|
case SAVE: \
|
||||||
goto rec;
|
if (sub->ref > 1) { \
|
||||||
case SAVE:
|
for (j = 0; j < subidx; j++) { \
|
||||||
sub = update(sub, pc[1], sp);
|
if (nsubs[j].ref <= 0) { \
|
||||||
pc += 2;
|
s1 = &nsubs[j]; \
|
||||||
goto rec;
|
goto freedsub##nn; \
|
||||||
case BOL:
|
} \
|
||||||
if(sp != beg)
|
} \
|
||||||
goto rec_check;
|
s1 = &nsubs[subidx++]; \
|
||||||
pc++; goto rec;
|
freedsub##nn: \
|
||||||
case EOL:
|
for (j = 0; j < nsubp; j++) \
|
||||||
if(*sp)
|
s1->sub[j] = sub->sub[j]; \
|
||||||
goto rec_check;
|
sub = s1; \
|
||||||
pc++; goto rec;
|
sub->ref = 1; \
|
||||||
}
|
} \
|
||||||
}
|
sub->sub[pc[1]] = _sp; \
|
||||||
|
pc += 2; \
|
||||||
|
goto rec##nn; \
|
||||||
|
case BOL: \
|
||||||
|
if(_sp != s) \
|
||||||
|
goto rec_check##nn; \
|
||||||
|
pc++; goto rec##nn; \
|
||||||
|
case EOL: \
|
||||||
|
if(*(_sp)) \
|
||||||
|
goto rec_check##nn; \
|
||||||
|
pc++; goto rec##nn; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
|
||||||
int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
|
int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
|
||||||
{
|
{
|
||||||
int i, c, l, gen, *pc;
|
int i, c, l, *npc, gen = 1, subidx = 1;
|
||||||
const char *sp;
|
const char *sp;
|
||||||
|
rsub nsubs[256];
|
||||||
int plist[prog->unilen];
|
int plist[prog->unilen];
|
||||||
rsub *sub, *matched = NULL;
|
int *pcs[prog->splits];
|
||||||
|
rsub *subs[prog->splits];
|
||||||
|
rsub *nsub = nsubs, *matched = NULL;
|
||||||
rthreadlist _clist[1+prog->len];
|
rthreadlist _clist[1+prog->len];
|
||||||
rthreadlist _nlist[1+prog->len];
|
rthreadlist _nlist[1+prog->len];
|
||||||
rthreadlist *clist = _clist, *nlist = _nlist, *tmp;
|
rthreadlist *clist = _clist, *nlist = _nlist, *tmp;
|
||||||
memset(plist, 0, prog->unilen*sizeof(plist[0]));
|
memset(plist, 0, prog->unilen*sizeof(plist[0]));
|
||||||
memset(clist, 0, (1+prog->len)*sizeof(rthread));
|
memset(clist, 0, (1+prog->len)*sizeof(rthread));
|
||||||
memset(nlist, 0, (1+prog->len)*sizeof(rthread));
|
memset(nlist, 0, (1+prog->len)*sizeof(rthread));
|
||||||
|
nsub->ref = 1;
|
||||||
|
|
||||||
subidx = 0;
|
for(i=0; i<nsubp; i++) {
|
||||||
freesub = NULL;
|
|
||||||
for(i=0; i<nsubp; i++)
|
|
||||||
subp[i] = NULL;
|
subp[i] = NULL;
|
||||||
sub = newsub(nsubp);
|
nsub->sub[i] = NULL;
|
||||||
for(i=0; i<nsubp; i++)
|
}
|
||||||
sub->sub[i] = NULL;
|
|
||||||
|
|
||||||
gen = 1;
|
gen = 1;
|
||||||
addthread(prog->insts, plist, gen, clist, prog->insts, sub, s, s);
|
while (1)
|
||||||
|
addthread(1, clist, prog->insts, nsub, s, break)
|
||||||
for(sp=s;; sp += l) {
|
for(sp=s;; sp += l) {
|
||||||
if(clist->n == 0)
|
if(clist->n == 0)
|
||||||
break;
|
break;
|
||||||
gen++; uc_len(l, s)
|
gen++; uc_len(l, s)
|
||||||
for(i=0; i<clist->n; i++) {
|
for(i=0; i<clist->n; i++) {
|
||||||
pc = clist->t[i].pc;
|
npc = clist->t[i].pc;
|
||||||
sub = clist->t[i].sub;
|
nsub = clist->t[i].sub;
|
||||||
if (inst_is_consumer(*pc) && !*sp) {
|
if (inst_is_consumer(*npc) && !*sp) {
|
||||||
// If we need to match a character, but there's none left,
|
// If we need to match a character, but there's none left,
|
||||||
// it's fail (we don't schedule current thread for continuation)
|
// it's fail (we don't schedule current thread for continuation)
|
||||||
decref(sub);
|
nsub->ref--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
switch(*pc++) {
|
switch(*npc++) {
|
||||||
case CHAR:
|
case CHAR:
|
||||||
uc_code(c, sp)
|
uc_code(c, sp)
|
||||||
if(c != *pc++) {
|
if(c != *npc++)
|
||||||
decref(sub);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case ANY:
|
case ANY:
|
||||||
addthread:
|
addthread:
|
||||||
addthread(prog->insts, plist, gen, nlist, pc, sub, s, sp+l);
|
addthread(2, nlist, npc, nsub, sp+l, continue)
|
||||||
break;
|
|
||||||
case CLASS:
|
case CLASS:
|
||||||
if (!re_classmatch(pc, sp)) {
|
if (!re_classmatch(npc, sp))
|
||||||
decref(sub);
|
|
||||||
break;
|
break;
|
||||||
}
|
npc += *(npc+1) * 2 + 2;
|
||||||
pc += *(pc+1) * 2 + 2;
|
|
||||||
goto addthread;
|
goto addthread;
|
||||||
case NAMEDCLASS:
|
case NAMEDCLASS:
|
||||||
if (!re_namedclassmatch(pc, sp)) {
|
if (!re_namedclassmatch(npc, sp))
|
||||||
decref(sub);
|
|
||||||
break;
|
break;
|
||||||
}
|
npc++;
|
||||||
pc++;
|
|
||||||
goto addthread;
|
goto addthread;
|
||||||
case MATCH:
|
case MATCH:
|
||||||
if(matched)
|
matched = nsub;
|
||||||
decref(matched);
|
|
||||||
matched = sub;
|
|
||||||
for(i++; i < clist->n; i++)
|
for(i++; i < clist->n; i++)
|
||||||
decref(clist->t[i].sub);
|
clist->t[i].sub->ref--;
|
||||||
goto BreakFor;
|
goto BreakFor;
|
||||||
}
|
}
|
||||||
|
nsub->ref--;
|
||||||
}
|
}
|
||||||
BreakFor:
|
BreakFor:
|
||||||
tmp = clist;
|
tmp = clist;
|
||||||
@@ -664,7 +636,6 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
|
|||||||
if(matched) {
|
if(matched) {
|
||||||
for(i=0; i<nsubp; i++)
|
for(i=0; i<nsubp; i++)
|
||||||
subp[i] = matched->sub[i];
|
subp[i] = matched->sub[i];
|
||||||
decref(matched);
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@@ -679,21 +650,24 @@ int main(int argc, char *argv[])
|
|||||||
int sz = re_sizecode(argv[1]) * sizeof(int);
|
int sz = re_sizecode(argv[1]) * sizeof(int);
|
||||||
printf("Precalculated size: %d\n", sz);
|
printf("Precalculated size: %d\n", sz);
|
||||||
char code[sizeof(rcode)+sz];
|
char code[sizeof(rcode)+sz];
|
||||||
rcode *_code = (rcode*)&code;
|
rcode *_code = (rcode*)code;
|
||||||
if (re_comp(_code, argv[1], 0))
|
if (re_comp(_code, argv[1], 0))
|
||||||
re_fatal("Error in re_comp");
|
re_fatal("Error in re_comp");
|
||||||
re_dumpcode(_code);
|
re_dumpcode(_code);
|
||||||
|
#include <time.h>
|
||||||
if (argc > 2) {
|
if (argc > 2) {
|
||||||
int sub_els = (_code->sub + 1) * 2;
|
int sub_els = (_code->sub + 1) * 2;
|
||||||
const char *sub[sub_els];
|
const char *sub[sub_els];
|
||||||
for (int i = 2; i < argc; i++) {
|
for (int i = 2; i < argc; i++) {
|
||||||
printf("sub depth %d\n", subidx);
|
|
||||||
printf("input bytelen: %d\n", strlen(argv[i]));
|
printf("input bytelen: %d\n", strlen(argv[i]));
|
||||||
|
clock_t start_time = clock();
|
||||||
if(!re_pikevm(_code, argv[i], sub, sub_els))
|
if(!re_pikevm(_code, argv[i], sub, sub_els))
|
||||||
{ printf("-nomatch-\n"); continue; }
|
{ printf("-nomatch-\n"); continue; }
|
||||||
for(int k=sub_els; k>0; k--)
|
for(int k=sub_els; k>0; k--)
|
||||||
if(sub[k-1])
|
if(sub[k-1])
|
||||||
break;
|
break;
|
||||||
|
double elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC;
|
||||||
|
printf("Done in %f seconds\n", elapsed_time);
|
||||||
for(int l=0; l<sub_els; l+=2) {
|
for(int l=0; l<sub_els; l+=2) {
|
||||||
printf("(");
|
printf("(");
|
||||||
if(sub[l] == NULL)
|
if(sub[l] == NULL)
|
||||||
@@ -709,7 +683,6 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
16
test.sh
16
test.sh
@@ -46,6 +46,8 @@ b[^c]*
|
|||||||
([^abc])|(a+)
|
([^abc])|(a+)
|
||||||
[a-g]+
|
[a-g]+
|
||||||
[а-г]+
|
[а-г]+
|
||||||
|
called|chief|dust|familiar|forth|waif|campaign|divers|smile|notice|kill|human|stands|nightshade|dollar|doughty|gloaming|twist|July|officers|wrest|coop|one|ability|welcome|significance|writer|spring|it's|helped|set|Paris|from|coomb|stay|hummock|taken|anon|makes|boat|nearly|am|justice|further|expression|contemporary|sooth|order|about|question|lived|apply|educational|of|night|satisfy|opened|never|success|until|visit|promise|parts|beneath|matter|typical|bade|apartment|rapidly|primary|bring|throat|hold|laws|understand|trade|desire|material|evidence|another|often|plash|model|someone|bond|hell|relationship|probably|exercise|performance|wants|known|countries|gammer|leeward|took|itself|representative|objection|aircraft
|
||||||
|
abc+h+d+f
|
||||||
"
|
"
|
||||||
input="\
|
input="\
|
||||||
abcdef
|
abcdef
|
||||||
@@ -93,6 +95,8 @@ abc
|
|||||||
aaaa
|
aaaa
|
||||||
aaaabcdefghij
|
aaaabcdefghij
|
||||||
ааааабвг...
|
ааааабвг...
|
||||||
|
hhfd h23 performance
|
||||||
|
abcccccccccccchdf
|
||||||
"
|
"
|
||||||
expect="\
|
expect="\
|
||||||
(0,3)
|
(0,3)
|
||||||
@@ -140,18 +144,22 @@ expect="\
|
|||||||
(0,4)(?,?)(0,4)
|
(0,4)(?,?)(0,4)
|
||||||
(0,10)
|
(0,10)
|
||||||
(0,16)
|
(0,16)
|
||||||
|
(10,21)
|
||||||
|
(0,17)
|
||||||
(0,0)
|
(0,0)
|
||||||
"
|
"
|
||||||
c=1
|
c=1
|
||||||
echo "$regex" | tr '\n' | while read re; do
|
echo "$regex" | tr '\n' | while read re; do
|
||||||
inp=$(echo "$input" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}')
|
inp=$(echo "$input" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}')
|
||||||
exp=$(echo "$expect" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}')
|
exp=$(echo "$expect" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}')
|
||||||
var=$(echo $(./a.out "$re" "$inp" | awk 'END{print}'))
|
var=$(./a.out "$re" "$inp")
|
||||||
if [ ! "$exp" = "$var" ]; then
|
var1=$(echo "$var" | tail -1)
|
||||||
echo "fail test$c regex:$re input:$inp expect:$exp output:$var"
|
if [ ! "$exp" = "$var1" ]; then
|
||||||
|
echo "fail test$c regex:$re input:$inp expect:$exp output:$var1"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "pass test$c regex:$re input:$inp expect:$exp output:$var"
|
time=$(echo "$var" | tail -2 | head -n1)
|
||||||
|
echo "pass test$c regex:$re input:$inp expect:$exp output:$var1 $time"
|
||||||
c=$((c+1))
|
c=$((c+1))
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user