add word boundary assert
This commit is contained in:
31
pike.c
31
pike.c
@@ -47,6 +47,19 @@ else if (~dst & 0x08) \
|
||||
else \
|
||||
dst = 0; \
|
||||
|
||||
static int isword(const char *s)
|
||||
{
|
||||
int c = (unsigned char) s[0];
|
||||
return isalnum(c) || c == '_' || c > 127;
|
||||
}
|
||||
|
||||
static char *uc_beg(char *beg, char *s)
|
||||
{
|
||||
while (s > beg && (((unsigned char) *s) & 0xc0) == 0x80)
|
||||
s--;
|
||||
return s;
|
||||
}
|
||||
|
||||
typedef struct rinst rinst;
|
||||
struct rinst
|
||||
{
|
||||
@@ -84,6 +97,7 @@ enum /* rinst.opcode */
|
||||
ASSERT,
|
||||
BOL,
|
||||
EOL,
|
||||
WBND,
|
||||
// Instructions which take relative offset as arg
|
||||
JMP,
|
||||
SPLIT,
|
||||
@@ -196,7 +210,13 @@ void re_dumpcode(rcode *prog)
|
||||
printf("save %d\n", code[pc++]);
|
||||
break;
|
||||
case ASSERT:
|
||||
printf("assert %s\n", code[pc++] == BOL ? "bol" : "eol");
|
||||
if (code[pc] == BOL)
|
||||
printf("assert bol\n");
|
||||
else if (code[pc] == EOL)
|
||||
printf("assert eol\n");
|
||||
else if (code[pc] == WBND)
|
||||
printf("assert WBND\n");
|
||||
pc++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -215,6 +235,13 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
|
||||
case '\\':
|
||||
re++;
|
||||
if (!*re) goto syntax_error; // Trailing backslash
|
||||
if (*re == 'b') {
|
||||
EMIT(PC++, ASSERT);
|
||||
EMIT(PC++, WBND);
|
||||
prog->len++;
|
||||
term = PC;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
term = PC;
|
||||
EMIT(PC++, CHAR);
|
||||
@@ -529,6 +556,8 @@ int re_comp(rcode *prog, const char *re, int anchored)
|
||||
goto rec_check##nn; \
|
||||
if(*pc == EOL && *_sp) \
|
||||
goto rec_check##nn; \
|
||||
if(*pc == WBND && isword(sp)) \
|
||||
goto rec_check##nn; \
|
||||
pc++; goto rec##nn; \
|
||||
} \
|
||||
} \
|
||||
|
||||
9
test.sh
9
test.sh
@@ -49,6 +49,9 @@ abc+h+d+f
|
||||
[A-Fa-f0-9]{64}
|
||||
<tag>[^<]*</tag>
|
||||
^([a-z0-9_.-]+)@([0-9a-z.-]+)\\\\.([a-z.]{2,5})$
|
||||
\\\\babc
|
||||
ab\\\\bd
|
||||
\\\\b(as|js)
|
||||
"
|
||||
input="\
|
||||
abcdef
|
||||
@@ -99,6 +102,9 @@ abcccccccccccchdf
|
||||
bf33d4a0dbbee85061531c9d47e5aae692c0729e5c9c1fa21c46d9bcab5f52c5
|
||||
ajdas <tag> sidufisudf hsdfhshdfh sdf </tag> asjdfjs
|
||||
veloval596@godpeed.com
|
||||
abc
|
||||
ab d
|
||||
js hashasd
|
||||
"
|
||||
expect="\
|
||||
(0,3)
|
||||
@@ -149,6 +155,9 @@ expect="\
|
||||
(0,64)
|
||||
(6,44)
|
||||
(0,22)(0,10)(11,18)(19,22)
|
||||
(7,10)
|
||||
-nomatch-
|
||||
(5,7)(5,7)
|
||||
(0,0)
|
||||
"
|
||||
c=1
|
||||
|
||||
Reference in New Issue
Block a user