From 678295f25ee1915d93d80c3650f76e55b7eefbf0 Mon Sep 17 00:00:00 2001 From: Kyryl Melekhin Date: Wed, 21 Jul 2021 15:39:11 +0000 Subject: [PATCH] add word boundary assert --- pike.c | 31 ++++++++++++++++++++++++++++++- test.sh | 9 +++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/pike.c b/pike.c index e2bb77f..c371745 100644 --- a/pike.c +++ b/pike.c @@ -47,6 +47,19 @@ else if (~dst & 0x08) \ else \ dst = 0; \ +static int isword(const char *s) +{ + int c = (unsigned char) s[0]; + return isalnum(c) || c == '_' || c > 127; +} + +static char *uc_beg(char *beg, char *s) +{ + while (s > beg && (((unsigned char) *s) & 0xc0) == 0x80) + s--; + return s; +} + typedef struct rinst rinst; struct rinst { @@ -84,6 +97,7 @@ enum /* rinst.opcode */ ASSERT, BOL, EOL, + WBND, // Instructions which take relative offset as arg JMP, SPLIT, @@ -196,7 +210,13 @@ void re_dumpcode(rcode *prog) printf("save %d\n", code[pc++]); break; case ASSERT: - printf("assert %s\n", code[pc++] == BOL ? "bol" : "eol"); + if (code[pc] == BOL) + printf("assert bol\n"); + else if (code[pc] == EOL) + printf("assert eol\n"); + else if (code[pc] == WBND) + printf("assert WBND\n"); + pc++; break; } } @@ -215,6 +235,13 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode) case '\\': re++; if (!*re) goto syntax_error; // Trailing backslash + if (*re == 'b') { + EMIT(PC++, ASSERT); + EMIT(PC++, WBND); + prog->len++; + term = PC; + break; + } default: term = PC; EMIT(PC++, CHAR); @@ -529,6 +556,8 @@ int re_comp(rcode *prog, const char *re, int anchored) goto rec_check##nn; \ if(*pc == EOL && *_sp) \ goto rec_check##nn; \ + if(*pc == WBND && isword(sp)) \ + goto rec_check##nn; \ pc++; goto rec##nn; \ } \ } \ diff --git a/test.sh b/test.sh index 0e4b33b..c2c1b98 100755 --- a/test.sh +++ b/test.sh @@ -49,6 +49,9 @@ abc+h+d+f [A-Fa-f0-9]{64} [^<]* ^([a-z0-9_.-]+)@([0-9a-z.-]+)\\\\.([a-z.]{2,5})$ +\\\\babc +ab\\\\bd +\\\\b(as|js) " input="\ abcdef @@ -99,6 +102,9 @@ abcccccccccccchdf bf33d4a0dbbee85061531c9d47e5aae692c0729e5c9c1fa21c46d9bcab5f52c5 ajdas sidufisudf hsdfhshdfh sdf asjdfjs veloval596@godpeed.com + abc +ab d + js hashasd " expect="\ (0,3) @@ -149,6 +155,9 @@ expect="\ (0,64) (6,44) (0,22)(0,10)(11,18)(19,22) +(7,10) +-nomatch- +(5,7)(5,7) (0,0) " c=1