better plist

This commit is contained in:
Kyryl Melekhin
2021-09-05 11:48:11 +00:00
parent 8af2598a44
commit 05d24c95d2

119
pike.c
View File

@@ -134,12 +134,12 @@ void re_dumpcode(rcode *prog)
pc = prog->unilen; pc = prog->unilen;
break; break;
case SPLIT: case SPLIT:
printf("split %d (%d)\n", pc + code[pc] + 1, code[pc]); printf("split %d (%d)\n", pc + code[pc] + 2, code[pc]);
pc++; pc+=2;
break; break;
case RSPLIT: case RSPLIT:
printf("rsplit %d (%d)\n", pc + code[pc] + 1, code[pc]); printf("rsplit %d (%d)\n", pc + code[pc] + 2, code[pc]);
pc++; pc+=2;
break; break;
case JMP: case JMP:
printf("jmp %d (%d)\n", pc + code[pc] + 1, code[pc]); printf("jmp %d (%d)\n", pc + code[pc] + 1, code[pc]);
@@ -291,7 +291,8 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
{ {
prog->splits++; prog->splits++;
EMIT(PC++, SPLIT); EMIT(PC++, SPLIT);
EMIT(PC++, REL(PC, PC+((size+2)*i))); EMIT(PC++, REL(PC-1, PC+((size+3)*i)));
EMIT(PC++, 0);
if (code) if (code)
memcpy(&code[PC], &code[term], size*sizeof(int)); memcpy(&code[PC], &code[term], size*sizeof(int));
PC += size; PC += size;
@@ -314,20 +315,21 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
break; break;
case '?': case '?':
if (PC == term) goto syntax_error; if (PC == term) goto syntax_error;
INSERT_CODE(term, 2, PC); INSERT_CODE(term, 3, PC);
if (re[1] == '?') { if (re[1] == '?') {
EMIT(term, RSPLIT); EMIT(term, RSPLIT);
re++; re++;
} else } else
EMIT(term, SPLIT); EMIT(term, SPLIT);
EMIT(term + 1, REL(term, PC)); EMIT(term + 1, REL(term, PC-1));
EMIT(term + 2, 0);
prog->len++; prog->len++;
prog->splits++; prog->splits++;
term = PC; term = PC;
break; break;
case '*': case '*':
if (PC == term) goto syntax_error; if (PC == term) goto syntax_error;
INSERT_CODE(term, 2, PC); INSERT_CODE(term, 3, PC);
EMIT(PC, JMP); EMIT(PC, JMP);
EMIT(PC + 1, REL(PC, term)); EMIT(PC + 1, REL(PC, term));
PC += 2; PC += 2;
@@ -336,7 +338,8 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
re++; re++;
} else } else
EMIT(term, SPLIT); EMIT(term, SPLIT);
EMIT(term + 1, REL(term, PC)); EMIT(term + 1, REL(term, PC-1));
EMIT(term + 2, 0);
prog->splits++; prog->splits++;
prog->len += 2; prog->len += 2;
term = PC; term = PC;
@@ -348,8 +351,9 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
re++; re++;
} else } else
EMIT(PC, RSPLIT); EMIT(PC, RSPLIT);
EMIT(PC + 1, REL(PC, term)); EMIT(PC + 1, REL(PC-1, term));
PC += 2; EMIT(PC + 2, 0);
PC += 3;
prog->splits++; prog->splits++;
prog->len++; prog->len++;
term = PC; term = PC;
@@ -357,11 +361,12 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
case '|': case '|':
if (alt_label) if (alt_label)
EMIT(alt_label, REL(alt_label, PC) + 1); EMIT(alt_label, REL(alt_label, PC) + 1);
INSERT_CODE(start, 2, PC); INSERT_CODE(start, 3, PC);
EMIT(PC++, JMP); EMIT(PC++, JMP);
alt_label = PC++; alt_label = PC++;
EMIT(start, SPLIT); EMIT(start, SPLIT);
EMIT(start + 1, REL(start, PC)); EMIT(start + 1, REL(start, PC-1));
EMIT(start + 2, 0);
prog->splits++; prog->splits++;
prog->len += 2; prog->len += 2;
term = PC; term = PC;
@@ -441,93 +446,91 @@ if (--csub->ref == 0) { \
} \ } \
#define deccheck(nn) \ #define deccheck(nn) \
{ decref(sub) goto rec_check##nn; } \ { decref(nsub) goto rec_check##nn; } \
#define fastrec(nn, list, listidx) \ #define fastrec(nn, list, listidx) \
if (*pc < WBEG) { \ nsub->ref++; \
list[listidx].sub = sub; \ if (*npc < WBEG) { \
list[listidx++].pc = pc; \ list[listidx].sub = nsub; \
pc = pcs[i]; \ list[listidx++].pc = npc; \
npc = pcs[i]; \
goto rec##nn; \ goto rec##nn; \
} \ } \
subs[i++] = sub; \ subs[i++] = nsub; \
goto next##nn; \ goto next##nn; \
#define saveclist() \ #define saveclist() \
newsub(for (j = 0; j < nsubp; j++) s1->sub[j] = sub->sub[j];, \ newsub(for (j = 0; j < nsubp; j++) s1->sub[j] = nsub->sub[j];, \
for (j = 0; j < nsubp / 2 - 1; j++) s1->sub[j] = sub->sub[j];) \ for (j = 0; j < nsubp / 2 - 1; j++) s1->sub[j] = nsub->sub[j];) \
#define savenlist() \ #define savenlist() \
newsub(/*nop*/, /*nop*/) \ newsub(/*nop*/, /*nop*/) \
for (j = 0; j < nsubp; j++) s1->sub[j] = sub->sub[j]; \ for (j = 0; j < nsubp; j++) s1->sub[j] = nsub->sub[j]; \
#define addthread(nn, list, listidx, _pc, _sub) \ #define addthread(nn, list, listidx) \
{ \ { \
int i = 0, *pc = _pc; \ int i = 0; \
rsub *sub = _sub; \
rec##nn: \ rec##nn: \
if (*pc < WBEG) { \ if (*npc < WBEG) { \
list[listidx].sub = sub; \ list[listidx].sub = nsub; \
list[listidx++].pc = pc; \ list[listidx++].pc = npc; \
rec_check##nn: \ rec_check##nn: \
if (i) { \ if (i) { \
pc = pcs[--i]; \ npc = pcs[--i]; \
sub = subs[i]; \ nsub = subs[i]; \
goto rec##nn; \ goto rec##nn; \
} \ } \
continue; \ continue; \
} \ } \
next##nn: \ next##nn: \
switch(*pc) { \ switch(*npc) { \
case JMP: \ case JMP: \
pc += 2 + pc[1]; \ npc += 2 + npc[1]; \
goto rec##nn; \ goto rec##nn; \
case SPLIT: \ case SPLIT: \
if (plist[pc - insts] == gen) \ if (npc[2] == gen) \
deccheck(nn) \ deccheck(nn) \
plist[pc - insts] = gen; \ npc[2] = gen; \
sub->ref++; \ npc += 3; \
pc += 2; \ pcs[i] = npc + npc[-2]; \
pcs[i] = pc + pc[-1]; \
fastrec(nn, list, listidx) \ fastrec(nn, list, listidx) \
case RSPLIT: \ case RSPLIT: \
if (plist[pc - insts] == gen) \ if (npc[2] == gen) \
deccheck(nn) \ deccheck(nn) \
plist[pc - insts] = gen; \ npc[2] = gen; \
sub->ref++; \ npc += 3; \
pc += 2; \ pcs[i] = npc; \
pcs[i] = pc; \ npc += npc[-2]; \
pc += pc[-1]; \
fastrec(nn, list, listidx) \ fastrec(nn, list, listidx) \
case SAVE: \ case SAVE: \
if (sub->ref > 1) { \ if (nsub->ref > 1) { \
sub->ref--; \ nsub->ref--; \
save##list() \ save##list() \
sub = s1; \ nsub = s1; \
sub->ref = 1; \ nsub->ref = 1; \
} \ } \
sub->sub[pc[1]] = _sp; \ nsub->sub[npc[1]] = _sp; \
pc += 2; \ npc += 2; \
goto rec##nn; \ goto rec##nn; \
case WBEG: \ case WBEG: \
if ((sp != s && isword(sp)) || !isword(_sp)) \ if ((sp != s && isword(sp)) || !isword(_sp)) \
deccheck(nn) \ deccheck(nn) \
pc++; goto rec##nn; \ npc++; goto rec##nn; \
case WEND: \ case WEND: \
if (isword(_sp)) \ if (isword(_sp)) \
deccheck(nn) \ deccheck(nn) \
pc++; goto rec##nn; \ npc++; goto rec##nn; \
case BOL: \ case BOL: \
if (_sp != s) { \ if (_sp != s) { \
if (!i && !listidx) \ if (!i && !listidx) \
_return(0) \ _return(0) \
deccheck(nn) \ deccheck(nn) \
} \ } \
pc++; goto rec##nn; \ npc++; goto rec##nn; \
case EOL: \ case EOL: \
if (*_sp) \ if (*_sp) \
deccheck(nn) \ deccheck(nn) \
pc++; goto rec##nn; \ npc++; goto rec##nn; \
} \ } \
} \ } \
@@ -537,7 +540,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
int rsubsize = sizeof(rsub)+(sizeof(char*)*nsubp); int rsubsize = sizeof(rsub)+(sizeof(char*)*nsubp);
int clistidx = 0, nlistidx = 0; int clistidx = 0, nlistidx = 0;
const char *sp = s, *_sp = s; const char *sp = s, *_sp = s;
int *insts = prog->insts, *plist = insts+prog->unilen; int *insts = prog->insts;
int *pcs[prog->splits]; int *pcs[prog->splits];
rsub *subs[prog->splits]; rsub *subs[prog->splits];
char nsubs[rsubsize * (prog->len+3 - prog->splits)]; char nsubs[rsubsize * (prog->len+3 - prog->splits)];
@@ -558,7 +561,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
break; break;
case ANY: case ANY:
addthread: addthread:
addthread(2, nlist, nlistidx, npc, nsub) addthread(2, nlist, nlistidx)
case CLASS: case CLASS:
if (!re_classmatch(npc, c)) if (!re_classmatch(npc, c))
break; break;
@@ -587,7 +590,8 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
newsub(for (i = 1; i < nsubp; i++) s1->sub[i] = NULL;, /*nop*/) newsub(for (i = 1; i < nsubp; i++) s1->sub[i] = NULL;, /*nop*/)
s1->ref = 1; s1->ref = 1;
s1->sub[0] = _sp; s1->sub[0] = _sp;
addthread(1, clist, clistidx, insts, s1) npc = insts; nsub = s1;
addthread(1, clist, clistidx)
} else if (!clistidx) } else if (!clistidx)
break; break;
} }
@@ -610,8 +614,7 @@ int main(int argc, char *argv[])
int sub_els; int sub_els;
int sz = re_sizecode(argv[1], &sub_els) * sizeof(int); int sz = re_sizecode(argv[1], &sub_els) * sizeof(int);
printf("Precalculated size: %d\n", sz); printf("Precalculated size: %d\n", sz);
char code[(sizeof(rcode)+sz)*2]; char code[sizeof(rcode)+sz];
memset(code+sizeof(rcode)+sz, 0, sizeof(rcode)+sz);
rcode *_code = (rcode*)code; rcode *_code = (rcode*)code;
if (re_comp(_code, argv[1], sub_els)) { if (re_comp(_code, argv[1], sub_els)) {
printf("Error in re_comp"); printf("Error in re_comp");