diff --git a/pike.c b/pike.c
index e2bb77f..c371745 100644
--- a/pike.c
+++ b/pike.c
@@ -47,6 +47,19 @@ else if (~dst & 0x08) \
else \
dst = 0; \
+static int isword(const char *s)
+{
+ int c = (unsigned char) s[0];
+ return isalnum(c) || c == '_' || c > 127;
+}
+
+static char *uc_beg(char *beg, char *s)
+{
+ while (s > beg && (((unsigned char) *s) & 0xc0) == 0x80)
+ s--;
+ return s;
+}
+
typedef struct rinst rinst;
struct rinst
{
@@ -84,6 +97,7 @@ enum /* rinst.opcode */
ASSERT,
BOL,
EOL,
+ WBND,
// Instructions which take relative offset as arg
JMP,
SPLIT,
@@ -196,7 +210,13 @@ void re_dumpcode(rcode *prog)
printf("save %d\n", code[pc++]);
break;
case ASSERT:
- printf("assert %s\n", code[pc++] == BOL ? "bol" : "eol");
+ if (code[pc] == BOL)
+ printf("assert bol\n");
+ else if (code[pc] == EOL)
+ printf("assert eol\n");
+ else if (code[pc] == WBND)
+ printf("assert WBND\n");
+ pc++;
break;
}
}
@@ -215,6 +235,13 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
case '\\':
re++;
if (!*re) goto syntax_error; // Trailing backslash
+ if (*re == 'b') {
+ EMIT(PC++, ASSERT);
+ EMIT(PC++, WBND);
+ prog->len++;
+ term = PC;
+ break;
+ }
default:
term = PC;
EMIT(PC++, CHAR);
@@ -529,6 +556,8 @@ int re_comp(rcode *prog, const char *re, int anchored)
goto rec_check##nn; \
if(*pc == EOL && *_sp) \
goto rec_check##nn; \
+ if(*pc == WBND && isword(sp)) \
+ goto rec_check##nn; \
pc++; goto rec##nn; \
} \
} \
diff --git a/test.sh b/test.sh
index 0e4b33b..c2c1b98 100755
--- a/test.sh
+++ b/test.sh
@@ -49,6 +49,9 @@ abc+h+d+f
[A-Fa-f0-9]{64}
[^<]*
^([a-z0-9_.-]+)@([0-9a-z.-]+)\\\\.([a-z.]{2,5})$
+\\\\babc
+ab\\\\bd
+\\\\b(as|js)
"
input="\
abcdef
@@ -99,6 +102,9 @@ abcccccccccccchdf
bf33d4a0dbbee85061531c9d47e5aae692c0729e5c9c1fa21c46d9bcab5f52c5
ajdas sidufisudf hsdfhshdfh sdf asjdfjs
veloval596@godpeed.com
+ abc
+ab d
+ js hashasd
"
expect="\
(0,3)
@@ -149,6 +155,9 @@ expect="\
(0,64)
(6,44)
(0,22)(0,10)(11,18)(19,22)
+(7,10)
+-nomatch-
+(5,7)(5,7)
(0,0)
"
c=1