This commit is contained in:
Kyryl Melekhin
2021-07-30 16:47:53 +00:00
parent 5b64234754
commit 4dc1b1b535

42
pike.c
View File

@@ -60,23 +60,6 @@ static char *uc_beg(char *beg, char *s)
return s; return s;
} }
typedef struct rinst rinst;
struct rinst
{
int opcode;
int c;
int n;
rinst *x;
rinst *y;
};
typedef struct rprog rprog;
struct rprog
{
rinst *start;
int len;
};
typedef struct rcode rcode; typedef struct rcode rcode;
struct rcode struct rcode
{ {
@@ -87,12 +70,13 @@ struct rcode
int insts[]; int insts[];
}; };
enum /* rinst.opcode */ enum
{ {
// Instructions which consume input bytes (and thus fail if none left) // Instructions which consume input bytes (and thus fail if none left)
CHAR = 1, CHAR = 1,
ANY, ANY,
CLASS, CLASS,
MATCH,
// Assert position // Assert position
ASSERT, ASSERT,
BOL, BOL,
@@ -103,7 +87,6 @@ enum /* rinst.opcode */
RSPLIT, RSPLIT,
// Other (special) instructions // Other (special) instructions
SAVE, SAVE,
MATCH,
}; };
// Return codes for re_sizecode() and re_comp() // Return codes for re_sizecode() and re_comp()
@@ -114,7 +97,6 @@ enum {
RE_UNSUPPORTED_SYNTAX = -4, RE_UNSUPPORTED_SYNTAX = -4,
}; };
#define inst_is_consumer(inst) ((inst) < ASSERT)
typedef struct rsub rsub; typedef struct rsub rsub;
struct rsub struct rsub
{ {
@@ -462,7 +444,9 @@ int re_comp(rcode *prog, const char *re)
prog->insts[prog->unilen++] = SAVE; prog->insts[prog->unilen++] = SAVE;
prog->insts[prog->unilen++] = 1; prog->insts[prog->unilen++] = 1;
prog->insts[prog->unilen++] = MATCH; prog->insts[prog->unilen++] = MATCH;
prog->len += 2; prog->insts[prog->unilen++] = CHAR;
prog->insts[prog->unilen++] = 0;
prog->len += 3;
return RE_SUCCESS; return RE_SUCCESS;
} }
@@ -501,11 +485,12 @@ if (csub->ref > 1) { \
cont; \ cont; \
} \ } \
plist[pc - prog->insts] = gen; \ plist[pc - prog->insts] = gen; \
switch(*pc) { \ if (*pc < ASSERT) { \
default: \
list->t[list->n].sub = sub; \ list->t[list->n].sub = sub; \
list->t[list->n++].pc = pc; \ list->t[list->n++].pc = pc; \
goto rec_check##nn; \ goto rec_check##nn; \
} \
switch(*pc) { \
case JMP: \ case JMP: \
pc += 2 + pc[1]; \ pc += 2 + pc[1]; \
goto rec##nn; \ goto rec##nn; \
@@ -574,19 +559,14 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
for(i=0; i<clist->n; i++) { for(i=0; i<clist->n; i++) {
npc = clist->t[i].pc; npc = clist->t[i].pc;
nsub = clist->t[i].sub; nsub = clist->t[i].sub;
// If we need to match a character, but there's none left,
// it's fail (we don't schedule current thread for continuation)
if (inst_is_consumer(*npc) && !*sp) {
if (i >= clist->n-1)
goto break_for;
continue;
}
switch(*npc++) { switch(*npc++) {
case CHAR: case CHAR:
if(c != *npc++) if(c != *npc++)
break; break;
case ANY: case ANY:
addthread: addthread:
if (!c)
continue;
addthread(2, nlist, npc, nsub, continue) addthread(2, nlist, npc, nsub, continue)
case CLASS: case CLASS:
if (!re_classmatch(npc, c)) if (!re_classmatch(npc, c))
@@ -600,7 +580,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
} }
nsub->ref--; nsub->ref--;
} }
if (!matched) { if (!matched && c) {
nsub = lsub; nsub = lsub;
nsub->ref++; nsub->ref++;
save(3, nsub) save(3, nsub)