don't put asserts under same case, test rep operator

This commit is contained in:
Kyryl Melekhin
2021-08-05 11:25:55 +00:00
parent 95a9274ef3
commit 515312b80d
2 changed files with 34 additions and 30 deletions

57
pike.c
View File

@@ -72,11 +72,10 @@ enum
CLASS, CLASS,
MATCH, MATCH,
// Assert position // Assert position
ASSERT,
BOL,
EOL,
WBEG, WBEG,
WEND, WEND,
BOL,
EOL,
// Instructions which take relative offset as arg // Instructions which take relative offset as arg
JMP, JMP,
SPLIT, SPLIT,
@@ -178,16 +177,17 @@ void re_dumpcode(rcode *prog)
case SAVE: case SAVE:
printf("save %d\n", code[pc++]); printf("save %d\n", code[pc++]);
break; break;
case ASSERT: case WBEG:
if (code[pc] == BOL)
printf("assert bol\n");
else if (code[pc] == EOL)
printf("assert eol\n");
else if (code[pc] == WBEG)
printf("assert wbeg\n"); printf("assert wbeg\n");
else if (code[pc] == WEND) break;
case WEND:
printf("assert wend\n"); printf("assert wend\n");
pc++; break;
case BOL:
printf("assert bol\n");
break;
case EOL:
printf("assert eol\n");
break; break;
} }
} }
@@ -207,7 +207,6 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
re++; re++;
if (!*re) goto syntax_error; // Trailing backslash if (!*re) goto syntax_error; // Trailing backslash
if (*re == '<' || *re == '>') { if (*re == '<' || *re == '>') {
EMIT(PC++, ASSERT);
EMIT(PC++, *re == '<' ? WBEG : WEND); EMIT(PC++, *re == '<' ? WBEG : WEND);
prog->len++; prog->len++;
term = PC; term = PC;
@@ -286,7 +285,7 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
break; break;
case '{':; case '{':;
int maxcnt = 0, mincnt = 0, int maxcnt = 0, mincnt = 0,
i = 0, icnt = 0, size, split; i = 0, icnt = 0, size;
re++; re++;
while (isdigit((unsigned char) *re)) while (isdigit((unsigned char) *re))
mincnt = mincnt * 10 + *re++ - '0'; mincnt = mincnt * 10 + *re++ - '0';
@@ -303,11 +302,10 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
memcpy(&code[PC], &code[term], size*sizeof(int)); memcpy(&code[PC], &code[term], size*sizeof(int));
PC += size; PC += size;
} }
split = *(re+1) == '[' ? RSPLIT : SPLIT;
for (i = maxcnt-mincnt; i > 0; i--) for (i = maxcnt-mincnt; i > 0; i--)
{ {
prog->splits++; prog->splits++;
EMIT(PC++, split); EMIT(PC++, SPLIT);
EMIT(PC++, REL(PC, PC+((size+2)*i))); EMIT(PC++, REL(PC, PC+((size+2)*i)));
if (code) if (code)
memcpy(&code[PC], &code[term], size*sizeof(int)); memcpy(&code[PC], &code[term], size*sizeof(int));
@@ -388,13 +386,11 @@ static int _compilecode(const char **re_loc, rcode *prog, int sizecode)
term = PC; term = PC;
break; break;
case '^': case '^':
EMIT(PC++, ASSERT);
EMIT(PC++, BOL); EMIT(PC++, BOL);
prog->len++; prog->len++;
term = PC; term = PC;
break; break;
case '$': case '$':
EMIT(PC++, ASSERT);
EMIT(PC++, EOL); EMIT(PC++, EOL);
prog->len++; prog->len++;
term = PC; term = PC;
@@ -471,7 +467,7 @@ if (--csub->ref == 0) { \
const char *_sp = sp+l; \ const char *_sp = sp+l; \
rsub *sub = _sub; \ rsub *sub = _sub; \
rec##nn: \ rec##nn: \
if (*pc < ASSERT) { \ if (*pc < WBEG) { \
list[listidx].sub = sub; \ list[listidx].sub = sub; \
list[listidx++].pc = pc; \ list[listidx++].pc = pc; \
goto rec_check##nn; \ goto rec_check##nn; \
@@ -517,19 +513,24 @@ if (--csub->ref == 0) { \
sub->sub[pc[1]] = _sp; \ sub->sub[pc[1]] = _sp; \
pc += 2; \ pc += 2; \
goto rec##nn; \ goto rec##nn; \
case ASSERT: \ case WBEG: \
pc++; \ if (!nlistidx && (!isword(_sp) || isword(sp)) \
if (*pc == BOL && _sp != s) { \ && !(sp == s && isword(sp))) \
goto dec_check##nn; \
pc++; goto rec##nn; \
case WEND: \
if (isword(_sp)) \
goto dec_check##nn; \
pc++; goto rec##nn; \
case BOL: \
if (_sp != s) { \
if (!i && !listidx) \ if (!i && !listidx) \
_return(0) \ _return(0) \
goto dec_check##nn; \ goto dec_check##nn; \
} \ } \
if (*pc == EOL && *_sp) \ pc++; goto rec##nn; \
goto dec_check##nn; \ case EOL: \
if (*pc == WBEG && (!isword(_sp) || isword(sp)) \ if (*_sp) \
&& !(sp == s && isword(sp))) \
goto dec_check##nn; \
if (*pc == WEND && isword(_sp)) \
goto dec_check##nn; \ goto dec_check##nn; \
pc++; goto rec##nn; \ pc++; goto rec##nn; \
} \ } \
@@ -555,6 +556,7 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
gen = prog->gen; gen = prog->gen;
goto jmp_start; goto jmp_start;
for(;; sp += l) { for(;; sp += l) {
nlistidx = 0;
gen++; uc_len(l, sp) uc_code(c, sp) gen++; uc_len(l, sp) uc_code(c, sp)
for(i = 0; i < clistidx; i++) { for(i = 0; i < clistidx; i++) {
npc = clist[i].pc; npc = clist[i].pc;
@@ -585,7 +587,6 @@ int re_pikevm(rcode *prog, const char *s, const char **subp, int nsubp)
clist = nlist; clist = nlist;
nlist = tmp; nlist = tmp;
clistidx = nlistidx; clistidx = nlistidx;
nlistidx = 0;
if (!matched) { if (!matched) {
jmp_start: jmp_start:
newsub() newsub()

View File

@@ -90,6 +90,7 @@ abc\\\\>
[-+]?\\\\<(0[xX][0-9a-fA-FUL]+|[0-9.]{1,}[0-9eEfFuULl]+|[0-9]+)\\\\> [-+]?\\\\<(0[xX][0-9a-fA-FUL]+|[0-9.]{1,}[0-9eEfFuULl]+|[0-9]+)\\\\>
[-+]?\\\\<(0[xX][0-9a-fA-FUL]+|[0-9.]{1,}[0-9eEfFuULl]+|[0-9]+)\\\\> [-+]?\\\\<(0[xX][0-9a-fA-FUL]+|[0-9.]{1,}[0-9eEfFuULl]+|[0-9]+)\\\\>
[-+]?\\\\<(0[xX][0-9a-fA-FUL]+|[0-9.]{1,}[0-9eEfFuULl]+|[0-9]+)\\\\> [-+]?\\\\<(0[xX][0-9a-fA-FUL]+|[0-9.]{1,}[0-9eEfFuULl]+|[0-9]+)\\\\>
[-+]?\\\\<(0[xX][0-9a-fA-FUL]+|[0-9.]{1,}[0-9eEfFuULl]+|[0-9]+)\\\\>
qwerty.*$ qwerty.*$
" "
input="\ input="\
@@ -182,6 +183,7 @@ world
3245 jjfjjj 3245 jjfjjj
0x663q 0x663q
x37247 x37247
124435.7727ULL
jjdfjk sjdjjsqwerty jdfjdfhhdhfdjjjfj jjjdf jjdfjk sjdjjsqwerty jdfjdfhhdhfdjjjfj jjjdf
" "
expect="\ expect="\
@@ -274,6 +276,7 @@ expect="\
(3,7)(3,7) (3,7)(3,7)
-nomatch- -nomatch-
-nomatch- -nomatch-
(2,16)(2,16)
(14,44) (14,44)
(0,0) (0,0)
" "