combine asserts

This commit is contained in:
Kyryl Melekhin
2021-07-21 12:38:58 +00:00
parent f855e7d545
commit ba17d90916

37
pike.c
View File

@@ -19,14 +19,14 @@ const unsigned char utf8_length[256] = {
/* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* A */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* B */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* C */ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C */ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* F */ 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F */ 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
}; };
/* return the length of a utf-8 character */ /* return the length of a utf-8 character */
@@ -81,6 +81,7 @@ enum /* rinst.opcode */
ANY, ANY,
CLASS, CLASS,
// Assert position // Assert position
ASSERT,
BOL, BOL,
EOL, EOL,
// Instructions which take relative offset as arg // Instructions which take relative offset as arg
@@ -100,7 +101,7 @@ enum {
RE_UNSUPPORTED_SYNTAX = -4, RE_UNSUPPORTED_SYNTAX = -4,
}; };
#define inst_is_consumer(inst) ((inst) < BOL) #define inst_is_consumer(inst) ((inst) < ASSERT)
typedef struct rsub rsub; typedef struct rsub rsub;
struct rsub struct rsub
{ {
@@ -194,11 +195,8 @@ void re_dumpcode(rcode *prog)
case SAVE: case SAVE:
printf("save %d\n", code[pc++]); printf("save %d\n", code[pc++]);
break; break;
case BOL: case ASSERT:
printf("assert bol\n"); printf("assert %s\n", code[pc++] == BOL ? "bol" : "eol");
break;
case EOL:
printf("assert eol\n");
break; break;
} }
} }
@@ -392,11 +390,13 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
term = PC; term = PC;
break; break;
case '^': case '^':
EMIT(PC++, ASSERT);
EMIT(PC++, BOL); EMIT(PC++, BOL);
prog->len++; prog->len++;
term = PC; term = PC;
break; break;
case '$': case '$':
EMIT(PC++, ASSERT);
EMIT(PC++, EOL); EMIT(PC++, EOL);
prog->len++; prog->len++;
term = PC; term = PC;
@@ -523,12 +523,11 @@ int re_comp(rcode *prog, const char *re, int anchored)
sub->sub[pc[1]] = _sp; \ sub->sub[pc[1]] = _sp; \
pc += 2; \ pc += 2; \
goto rec##nn; \ goto rec##nn; \
case BOL: \ case ASSERT: \
if(_sp != s) \ pc++; \
if(*pc == BOL && _sp != s) \
goto rec_check##nn; \ goto rec_check##nn; \
pc++; goto rec##nn; \ if(*pc == EOL && *_sp) \
case EOL: \
if(*_sp) \
goto rec_check##nn; \ goto rec_check##nn; \
pc++; goto rec##nn; \ pc++; goto rec##nn; \
} \ } \
@@ -564,12 +563,10 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
for(i=0; i<clist->n; i++) { for(i=0; i<clist->n; i++) {
npc = clist->t[i].pc; npc = clist->t[i].pc;
nsub = clist->t[i].sub; nsub = clist->t[i].sub;
if (inst_is_consumer(*npc) && !*sp) {
// If we need to match a character, but there's none left, // If we need to match a character, but there's none left,
// it's fail (we don't schedule current thread for continuation) // it's fail (we don't schedule current thread for continuation)
nsub->ref--; if (inst_is_consumer(*npc) && !*sp)
continue; continue;
}
switch(*npc++) { switch(*npc++) {
case CHAR: case CHAR:
uc_code(c, sp) uc_code(c, sp)