get rid of all globals, inline/optimize

2021-07-18 14:23:13 +00:00
parent 5a3bb5729b
commit c4caa646e5
2 changed files with 115 additions and 134 deletions
--- a/pike.c
+++ b/pike.c
@@ -70,6 +70,7 @@ struct rcode
 	int unilen;
 	int len;
 	int sub;
 	int splits;
 	int insts[];
 };
@@ -105,7 +106,6 @@ typedef struct rsub rsub;
 struct rsub
 {
 	int ref;
 	int nsub;
 	const char *sub[128];
 };
@@ -137,46 +137,6 @@ void re_fatal(char *msg)
 	exit(2);
 }
 static rsub *freesub;
 static rsub subs[10];
 static int subidx;
 rsub* newsub(int n)
 {
 	rsub *s = freesub;
 	if(s != NULL)
 		freesub = (rsub*)s->sub[0];
 	else
 		s = &subs[subidx++];
 	s->nsub = n;
 	s->ref = 1;
 	return s;
 }
 rsub* update(rsub *s, int i, const char *p)
 {
 	rsub *s1;
 	int j;
 	if(s->ref > 1) {
 		s1 = newsub(s->nsub);
 		for(j=0; j<s->nsub; j++)
 			s1->sub[j] = s->sub[j];
 		s->ref--;
 		s = s1;
 	}
 	s->sub[i] = p;
 	return s;
 }
 void decref(rsub *s)
 {
 	if(--s->ref == 0) {
 		s->sub[0] = (char*)freesub;
 		freesub = s;
 	}
 }
 int re_classmatch(const int *pc, const char *sp)
 {
 	// pc points to "classnot" byte after opcode
@@ -382,6 +342,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
 			split = *(re+1) == '[' ? RSPLIT : SPLIT;
 			for (i = maxcnt-mincnt; i > 0; i--)
 			{
 				prog->splits++;
 				EMIT(PC++, split);
 				EMIT(PC++, REL(PC, PC+((size+2)*i)));
 				if (code)
@@ -414,6 +375,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
 			}
 			EMIT(term + 1, REL(term, PC));
 			prog->len++;
 			prog->splits++;
 			term = PC;
 			break;
 		case '*':
@@ -429,6 +391,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
 				EMIT(term, SPLIT);
 			}
 			EMIT(term + 1, REL(term, PC));
 			prog->splits++;
 			prog->len += 2;
 			term = PC;
 			break;
@@ -442,6 +405,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
 			}
 			EMIT(PC + 1, REL(PC, term));
 			PC += 2;
 			prog->splits++;
 			prog->len++;
 			term = PC;
 			break;
@@ -454,6 +418,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
 			alt_label = PC++;
 			EMIT(start, SPLIT);
 			EMIT(start + 1, REL(start, PC));
 			prog->splits++;
 			prog->len += 2;
 			term = PC;
 			break;
@@ -502,6 +467,7 @@ int re_comp(rcode *prog, const char *re, int anchored)
 	prog->len = 0;
 	prog->unilen = 0;
 	prog->sub = 0;
 	prog->splits = 0;
 	// Add code to implement non-anchored operation ("search").
 	// For anchored operation ("match"), this code will be just skipped.
@@ -516,6 +482,7 @@ int re_comp(rcode *prog, const char *re, int anchored)
 		prog->insts[prog->unilen++] = SAVE;
 		prog->insts[prog->unilen++] = 0;
 		prog->len += 4;
 		prog->splits++;
 	}
 	int res = _compilecode(&re, prog, /*sizecode*/0);
 	if (res < 0) return res;
@@ -524,136 +491,141 @@ int re_comp(rcode *prog, const char *re, int anchored)
 	prog->insts[prog->unilen++] = SAVE;
 	prog->insts[prog->unilen++] = 1;
 	prog->insts[prog->unilen++] = MATCH;
 	prog->len += 2;
 	return RE_SUCCESS;
 }
-static void addthread(const int *pbeg, int *plist, int gen, rthreadlist *l,
+#define addthread(nn, list, _pc, _sub, _sp, cont) \
-			 int *pc, rsub *sub, const char *beg, const char *sp)
+{ \
-{
+	int i = 0, j, *pc = _pc; \
-	int i = 0, *pcs[10];
+	rsub *s1, *sub = _sub; \
-	rsub *subs[10];
+	rec##nn: \
-	rec:
+	if(plist[pc - prog->insts] == gen) { \
-	if(plist[pc - pbeg] == gen) {
+		sub->ref--; \
-		decref(sub);
+		rec_check##nn: \
-		rec_check:
+		if (i) { \
-		if (i) {
+			pc = pcs[--i]; \
-			pc = pcs[--i];
+			sub = subs[i]; \
-			sub = subs[i];
+			goto rec##nn; \
-			goto rec;
+		} \
-		}
+		cont; \
-		return;	// already on list
+	} \
-	}
+	plist[pc - prog->insts] = gen; \
-	plist[pc - pbeg] = gen;
+	switch(*pc) { \
-
+	default: \
-	switch(*pc) {
+		list->t[list->n].sub = sub; \
-	default:
+		list->t[list->n++].pc = pc; \
-		l->t[l->n].sub = sub;
+		goto rec_check##nn; \
-		l->t[l->n++].pc = pc;
+	case JMP: \
-		goto rec_check;
+		pc += 2 + pc[1]; \
-	case JMP:
+		goto rec##nn; \
-		pc += 2 + pc[1];
+	case SPLIT: \
-		goto rec;
+		subs[i] = sub; \
-	case SPLIT:
+		sub->ref++; \
-		subs[i] = sub;
+		pc += 2; \
-		sub->ref++;
+		pcs[i++] = pc + pc[-1]; \
-		pc += 2;
+		goto rec##nn; \
-		pcs[i++] = pc + pc[-1];
+	case RSPLIT: \
-		goto rec;
+		subs[i] = sub; \
-	case RSPLIT:
+		sub->ref++; \
-		subs[i] = sub;
+		pc += 2; \
-		sub->ref++;
+		pcs[i++] = pc; \
-		pc += 2;
+		pc += pc[-1]; \
-		pcs[i++] = pc;
+		goto rec##nn; \
-		pc += pc[-1];
+	case SAVE: \
-		goto rec;
+		if (sub->ref > 1) { \
-	case SAVE:
+			for (j = 0; j < subidx; j++) { \
-		sub = update(sub, pc[1], sp);
+				if (nsubs[j].ref <= 0) { \
-		pc += 2;
+					s1 = &nsubs[j]; \
-		goto rec;
+					goto freedsub##nn; \
-	case BOL:
+				} \
-		if(sp != beg)
+			} \
-			goto rec_check;
+			s1 = &nsubs[subidx++]; \
-		pc++; goto rec;
+			freedsub##nn: \
-	case EOL:
+			for (j = 0; j < nsubp; j++) \
-		if(*sp)
+				s1->sub[j] = sub->sub[j]; \
-			goto rec_check;
+			sub = s1; \
-		pc++; goto rec;
+			sub->ref = 1; \
-	}
+		} \
-}
+		sub->sub[pc[1]] = _sp; \
 		pc += 2; \
 		goto rec##nn; \
 	case BOL: \
 		if(_sp != s) \
 			goto rec_check##nn; \
 		pc++; goto rec##nn; \
 	case EOL: \
 		if(*(_sp)) \
 			goto rec_check##nn; \
 		pc++; goto rec##nn; \
 	} \
 } \
 int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
 {
-	int i, c, l, gen, *pc;
+	int i, c, l, *npc, gen = 1, subidx = 1;
 	const char *sp;
 	rsub nsubs[256];
 	int plist[prog->unilen];
-	rsub *sub, *matched = NULL;
+	int *pcs[prog->splits];
 	rsub *subs[prog->splits];
 	rsub *nsub = nsubs, *matched = NULL;
 	rthreadlist _clist[1+prog->len]; 
 	rthreadlist _nlist[1+prog->len]; 
 	rthreadlist *clist = _clist, *nlist = _nlist, *tmp;
 	memset(plist, 0, prog->unilen*sizeof(plist[0]));
 	memset(clist, 0, (1+prog->len)*sizeof(rthread));
 	memset(nlist, 0, (1+prog->len)*sizeof(rthread));
 	nsub->ref = 1;
-	subidx = 0;
+	for(i=0; i<nsubp; i++) {
 	freesub = NULL;
 	for(i=0; i<nsubp; i++)
 		subp[i] = NULL;
-	sub = newsub(nsubp);
+		nsub->sub[i] = NULL;
-	for(i=0; i<nsubp; i++)
+	}
 		sub->sub[i] = NULL;
 	gen = 1;
-	addthread(prog->insts, plist, gen, clist, prog->insts, sub, s, s);
+	while (1)
 		addthread(1, clist, prog->insts, nsub, s, break)
 	for(sp=s;; sp += l) {
 		if(clist->n == 0)
 			break;
 		gen++; uc_len(l, s)
 		for(i=0; i<clist->n; i++) {
-			pc = clist->t[i].pc;
+			npc = clist->t[i].pc;
-			sub = clist->t[i].sub;
+			nsub = clist->t[i].sub;
-			if (inst_is_consumer(*pc) && !*sp) {
+			if (inst_is_consumer(*npc) && !*sp) {
 				// If we need to match a character, but there's none left,
 				// it's fail (we don't schedule current thread for continuation)
-				decref(sub);
+				nsub->ref--;
 				continue;
 			}
-			switch(*pc++) {
+			switch(*npc++) {
 			case CHAR:
 				uc_code(c, sp)
-				if(c != *pc++) {
+				if(c != *npc++)
 					decref(sub);
 					break;
 				}
 			case ANY:
 			addthread:
-				addthread(prog->insts, plist, gen, nlist, pc, sub, s, sp+l);
+				addthread(2, nlist, npc, nsub, sp+l, continue)
 				break;
 			case CLASS:
-				if (!re_classmatch(pc, sp)) {
+				if (!re_classmatch(npc, sp))
 					decref(sub);
 					break;
-				}
+				npc += *(npc+1) * 2 + 2;
 				pc += *(pc+1) * 2 + 2;
 				goto addthread;
 			case NAMEDCLASS:
-				if (!re_namedclassmatch(pc, sp)) {
+				if (!re_namedclassmatch(npc, sp))
 					decref(sub);
 					break;
-				}
+				npc++;
 				pc++;
 				goto addthread;
 			case MATCH:
-				if(matched)
+				matched = nsub;
 					decref(matched);
 				matched = sub;
 				for(i++; i < clist->n; i++)
-					decref(clist->t[i].sub);
+					clist->t[i].sub->ref--;
 				goto BreakFor;
 			}
 			nsub->ref--;
 		}
 	BreakFor:
 		tmp = clist;
@@ -664,7 +636,6 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
 	if(matched) {
 		for(i=0; i<nsubp; i++)
 			subp[i] = matched->sub[i];
 		decref(matched);
 		return 1;
 	}
 	return 0;
@@ -679,21 +650,24 @@ int main(int argc, char *argv[])
 	int sz = re_sizecode(argv[1]) * sizeof(int);
 	printf("Precalculated size: %d\n", sz);
 	char code[sizeof(rcode)+sz];
-	rcode *_code = (rcode*)&code;
+	rcode *_code = (rcode*)code;
 	if (re_comp(_code, argv[1], 0))
 		re_fatal("Error in re_comp");
 	re_dumpcode(_code);
 	#include <time.h>
 	if (argc > 2) {
 		int sub_els = (_code->sub + 1) * 2;
 		const char *sub[sub_els];
 		for (int i = 2; i < argc; i++) {
 			printf("sub depth %d\n", subidx);
 			printf("input bytelen: %d\n", strlen(argv[i]));
 			clock_t start_time = clock();
 			if(!re_pikevm(_code, argv[i], sub, sub_els))
 				{ printf("-nomatch-\n"); continue; }
 			for(int k=sub_els; k>0; k--)
 				if(sub[k-1])
 					break;
 			double elapsed_time = (double)(clock() - start_time) / CLOCKS_PER_SEC;
 			printf("Done in %f seconds\n", elapsed_time);
 			for(int l=0; l<sub_els; l+=2) {
 				printf("(");
 				if(sub[l] == NULL)
@@ -709,7 +683,6 @@ int main(int argc, char *argv[])
 			}
 			printf("\n");
 		}
 	}
 	return 0;
 }
--- a/test.sh
+++ b/test.sh
@@ -46,6 +46,8 @@ b[^c]*
 ([^abc])|(a+)
 [a-g]+
 [а-г]+
 called|chief|dust|familiar|forth|waif|campaign|divers|smile|notice|kill|human|stands|nightshade|dollar|doughty|gloaming|twist|July|officers|wrest|coop|one|ability|welcome|significance|writer|spring|it's|helped|set|Paris|from|coomb|stay|hummock|taken|anon|makes|boat|nearly|am|justice|further|expression|contemporary|sooth|order|about|question|lived|apply|educational|of|night|satisfy|opened|never|success|until|visit|promise|parts|beneath|matter|typical|bade|apartment|rapidly|primary|bring|throat|hold|laws|understand|trade|desire|material|evidence|another|often|plash|model|someone|bond|hell|relationship|probably|exercise|performance|wants|known|countries|gammer|leeward|took|itself|representative|objection|aircraft
 abc+h+d+f
 "
 input="\
 abcdef
@@ -93,6 +95,8 @@ abc
 aaaa
 aaaabcdefghij
 ааааабвг...
 hhfd h23  performance
 abcccccccccccchdf
 "
 expect="\
 (0,3)
@@ -140,18 +144,22 @@ expect="\
 (0,4)(?,?)(0,4)
 (0,10)
 (0,16)
 (10,21)
 (0,17)
 (0,0)
 "
 c=1
 echo "$regex" | tr '\n' | while read re; do
 	inp=$(echo "$input" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}')
 	exp=$(echo "$expect" | awk -v c=$c 'BEGIN{ RS = "" ; FS = "\n" }{print $c}')
-	var=$(echo $(./a.out "$re" "$inp" | awk 'END{print}'))
+	var=$(./a.out "$re" "$inp")
-	if [ ! "$exp" = "$var" ]; then
+	var1=$(echo "$var" | tail -1)
-		echo "fail test$c regex:$re input:$inp expect:$exp output:$var"
+	if [ ! "$exp" = "$var1" ]; then
 		echo "fail test$c regex:$re input:$inp expect:$exp output:$var1"
 		exit 1
 	fi
-	echo "pass test$c regex:$re input:$inp expect:$exp output:$var"
+	time=$(echo "$var" | tail -2 | head -n1)
 	echo "pass test$c regex:$re input:$inp expect:$exp output:$var1 $time"
 	c=$((c+1))
 done