diff --git a/pike.c b/pike.c
index c371745..905f3dd 100644
--- a/pike.c
+++ b/pike.c
@@ -458,28 +458,13 @@ int re_sizecode(const char *re)
 	return dummyprog.unilen;
 }
 
-int re_comp(rcode *prog, const char *re, int anchored)
+int re_comp(rcode *prog, const char *re)
 {
 	prog->len = 0;
 	prog->unilen = 0;
 	prog->sub = 0;
 	prog->splits = 0;
 
-	// Add code to implement non-anchored operation ("search").
-	// For anchored operation ("match"), this code will be just skipped.
-	// TODO: Implement search in much more efficient manner
-	if (!anchored) {
-		prog->insts[prog->unilen++] = RSPLIT;
-		prog->insts[prog->unilen++] = 3;
-		prog->insts[prog->unilen++] = ANY;
-		prog->insts[prog->unilen++] = JMP;
-		prog->insts[prog->unilen++] = -5;
-	
-		prog->insts[prog->unilen++] = SAVE;
-		prog->insts[prog->unilen++] = 0;
-		prog->len += 4;
-		prog->splits++;
-	}
 	int res = _compilecode(&re, prog, /*sizecode*/0);
 	if (res < 0) return res;
 	// If unparsed chars left
@@ -493,11 +478,28 @@ int re_comp(rcode *prog, const char *re, int anchored)
 	return RE_SUCCESS;
 }
 
+#define save(nn, csub) \
+if (csub->ref > 1) { \
+	for (j = 0; j < subidx; j++) { \
+		if (!nsubs[j].ref) { \
+			s1 = &nsubs[j]; \
+			goto freedsub##nn; \
+		} \
+	} \
+	s1 = &nsubs[subidx++]; \
+	freedsub##nn: \
+	for (j = 0; j < nsubp; j++) \
+		s1->sub[j] = csub->sub[j]; \
+	csub->ref--; \
+	csub = s1; \
+	csub->ref = 1; \
+} \
+
 #define addthread(nn, list, _pc, _sub, cont) \
 { \
-	int i = 0, j, *pc = _pc; \
+	int i = 0, *pc = _pc; \
 	const char *_sp = sp+l; \
-	rsub *s1, *sub = _sub; \
+	rsub *sub = _sub; \
 	rec##nn: \
 	if(plist[pc - prog->insts] == gen) { \
 		sub->ref--; \
@@ -532,21 +534,7 @@ int re_comp(rcode *prog, const char *re, int anchored)
 		pc += pc[-1]; \
 		goto rec##nn; \
 	case SAVE: \
-		if (sub->ref > 1) { \
-			for (j = 0; j < subidx; j++) { \
-				if (!nsubs[j].ref) { \
-					s1 = &nsubs[j]; \
-					goto freedsub##nn; \
-				} \
-			} \
-			s1 = &nsubs[subidx++]; \
-			freedsub##nn: \
-			for (j = 0; j < nsubp; j++) \
-				s1->sub[j] = sub->sub[j]; \
-			sub->ref--; \
-			sub = s1; \
-			sub->ref = 1; \
-		} \
+		save(nn, sub) \
 		sub->sub[pc[1]] = _sp; \
 		pc += 2; \
 		goto rec##nn; \
@@ -562,22 +550,27 @@ int re_comp(rcode *prog, const char *re, int anchored)
 	} \
 } \
 
+#define swaplist() \
+tmp = clist; \
+clist = nlist; \
+nlist = tmp; \
+nlist->n = 0; \
+
 int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
 {
-	int i, c, l = 0, *npc, gen = 1, subidx = 1;
+	int i, j, c, l = 0, *npc, gen = 1, subidx = 1;
 	const char *sp = s;
 	rsub nsubs[256];
 	int plist[prog->unilen];
 	int *pcs[prog->splits];
 	rsub *subs[prog->splits];
-	rsub *nsub = nsubs, *matched = NULL;
+	rsub *nsub = nsubs, *lsub = nsub, *matched = NULL, *s1;
 	rthreadlist _clist[1+prog->len]; 
 	rthreadlist _nlist[1+prog->len]; 
 	rthreadlist *clist = _clist, *nlist = _nlist, *tmp;
 	memset(plist, 0, prog->unilen*sizeof(plist[0]));
 	memset(clist, 0, (1+prog->len)*sizeof(rthread));
 	memset(nlist, 0, (1+prog->len)*sizeof(rthread));
-	nsub->ref = 1;
 
 	for(i=0; i<nsubp; i++) {
 		subp[i] = NULL;
@@ -585,8 +578,10 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
 	}
 
 	gen = 1;
-	while (1)
-		addthread(1, clist, prog->insts, nsub, break)
+	nsub->ref = 2;
+	save(0, nsub);
+	nsub->sub[0] = sp;
+	goto jmp_start;
 	for(; clist->n; sp += l) {
 		gen++; uc_len(l, sp)
 		for(i=0; i<clist->n; i++) {
@@ -594,8 +589,11 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
 			nsub = clist->t[i].sub;
 			// If we need to match a character, but there's none left,
 			// it's fail (we don't schedule current thread for continuation)
-			if (inst_is_consumer(*npc) && !*sp)
+			if (inst_is_consumer(*npc) && !*sp) {
+				if (i >= clist->n-1)
+					goto BreakFor;
 				continue;
+			}
 			switch(*npc++) {
 			case CHAR:
 				uc_code(c, sp)
@@ -616,11 +614,19 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
 			}
 			nsub->ref--;
 		}
+		if (!matched) {
+			nsub = lsub;
+			nsub->ref++;
+			save(3, nsub)
+			nsub->sub[0] = sp + l;
+			swaplist()
+			jmp_start:
+			while (1)
+				addthread(1, clist, prog->insts, nsub, break)
+			continue;
+		}
 	BreakFor:
-		tmp = clist;
-		clist = nlist;
-		nlist = tmp;
-		nlist->n = 0;
+		swaplist()
 	}
 	if(matched) {
 		for(i=0; i<nsubp; i++)
@@ -640,7 +646,7 @@ int main(int argc, char *argv[])
 	printf("Precalculated size: %d\n", sz);
 	char code[sizeof(rcode)+sz];
 	rcode *_code = (rcode*)code;
-	if (re_comp(_code, argv[1], 0))
+	if (re_comp(_code, argv[1]))
 		re_fatal("Error in re_comp");
 	re_dumpcode(_code);
 	#include <time.h>
diff --git a/test.sh b/test.sh
index c2c1b98..a6b6d59 100755
--- a/test.sh
+++ b/test.sh
@@ -49,9 +49,18 @@ abc+h+d+f
 [A-Fa-f0-9]{64}
 <tag>[^<]*</tag>
 ^([a-z0-9_.-]+)@([0-9a-z.-]+)\\\\.([a-z.]{2,5})$
+abc\$d
+abc$|cdb
+abc$|c
+^ac|cdb
+^abc+d
+^(abc|kj)
+^(abc|kj)
 \\\\babc
 ab\\\\bd
 \\\\b(as|js)
+([^qwe]*rty)|(asd[^fgh]*)
+([^qwe]*rty+)|(asd[^fgh]*)
 "
 input="\
 abcdef
@@ -102,9 +111,18 @@ abcccccccccccchdf
 bf33d4a0dbbee85061531c9d47e5aae692c0729e5c9c1fa21c46d9bcab5f52c5
 ajdas <tag> sidufisudf hsdfhshdfh sdf </tag> asjdfjs
 veloval596@godpeed.com
+abc
+abccdb
+abcc
+abccdb
+abccdb
+kj
+jhdfh kj hhd
    	   abc
 ab   d
      js hashasd
+qweasd     qqqq fff
+qwehh  sjsjsj rtyyyyyyyyyj sdj
 "
 expect="\
 (0,3)
@@ -155,9 +173,18 @@ expect="\
 (0,64)
 (6,44)
 (0,22)(0,10)(11,18)(19,22)
+-nomatch-
+(3,6)
+(2,3)
+(3,6)
+(0,5)
+(0,2)(0,2)
+-nomatch-
 (7,10)
 -nomatch-
 (5,7)(5,7)
+(3,16)(?,?)(3,16)
+(3,25)(3,25)(?,?)
 (0,0)
 "
 c=1