From a15e1c212a3a79e4cbf35051c83130337d37d54e Mon Sep 17 00:00:00 2001 From: PedroEdiaz Date: Fri, 26 Dec 2025 12:29:26 -0600 Subject: [PATCH] Static check and remove for epsilon loops --- .clang-format | 15 ++ .gitignore | 2 + Makefile | 14 +- src/lexer.c | 545 ++++++++++++++++++++++++++++++++++---------------- src/main.c | 34 ---- src/main.h | 11 +- src/test.c | 522 +++++++++++++++++++++++++++++++++++++++++------ 7 files changed, 866 insertions(+), 277 deletions(-) create mode 100644 .clang-format create mode 100644 .gitignore delete mode 100644 src/main.c diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..816917a --- /dev/null +++ b/.clang-format @@ -0,0 +1,15 @@ +# Tabs +UseTab: ForContinuationAndIndentation #ForIndentation +# Sized +TabWidth: 8 +IndentWidth: 8 +ContinuationIndentWidth: 8 + +# Column Limit +ColumnLimit: 80 + +# Functions +AllowAllArgumentsOnNextLine: false + +# Allman +BreakBeforeBraces: Allman diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..05fab23 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +src/*.o +src/test diff --git a/Makefile b/Makefile index 8ded207..60d3261 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,25 @@ -BIN = src/main TEST = src/test +DOC = doc/main.pdf + OBJ = \ src/lexer.o -#all: $(BIN) $(TEST) +all: $(BIN) $(TEST) $(DOC) test: $(TEST) +$(DOC): doc/main.typst + typst compile $< $@ + $(BIN): $(OBJ) $(BIN).o $(CC) -g -o $(BIN) $(BIN).o $(OBJ) $(TEST): $(OBJ) $(TEST).o $(CC) -g -o $(TEST) $(TEST).o $(OBJ) + +fmt: + clang-format -i src/*.c src/*.h + +clean: + rm $(OBJ) $(BIN) diff --git a/src/lexer.c b/src/lexer.c index c71a869..23ff603 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,147 +1,279 @@ -#include -#include #include -#include "main.h" +#include +#include -struct patch_list +#define DEBUG 0 + +#ifdef DEBUG +#include +#endif + +struct ll { - struct rgx_nfa **ptr; - struct patch_list *next; + void *p; + struct ll *next; }; -struct frag +struct frag { - struct rgx_nfa *start; - struct patch_list *pl; -}; - -struct patch_list * pl_append(struct patch_list * pl, struct rgx_nfa ** nfa) -{ - struct patch_list * res, * root = pl; - res = malloc(sizeof(struct patch_list)); - *res = (struct patch_list){nfa, NULL}; - - if (pl == 0 ) { - return res; - } - - while (pl->next) { - pl = pl->next; - } - - pl->next = res; - - return root; -} - -void pl_merge(struct patch_list ** pl1, struct patch_list * pl2) -{ - struct patch_list * pl = *pl1; - - if (!pl ) { - *pl1 = pl2; - return; - } - - while (pl->next) { - pl = pl->next; - } - - pl->next = pl2; - -} - -struct rgx_nfa * patch(struct patch_list *pl, struct rgx_nfa *to) -{ - while (pl) { - *pl->ptr = to; - pl = pl->next; - } - return to; -} - -void collapse(struct frag * stack, unsigned char * i) -{ - if (stack[*i].start->op == BLOCK ) { - stack[(*i)--] ; - return; - } - - while (*i > 0 ) + enum { - struct frag to; + RUNE, + CLASS, + NOT, + ANY, + FINAL, + UNION, + BLOCK + } type; + union + { + char rune; + char class; + char final; + struct frag *try; + struct frag * not ; + struct ll *ul; + } as; + struct frag *next; + struct ll *pl; +}; - to = stack[(*i)--]; - - if (stack[*i].start->op == BLOCK ) { - stack[*i]= to; - return; - } - - patch(stack[*i].pl, to.start); - stack[*i].pl = to.pl; - } -} - -struct rgx_nfa * mknfa(struct rgx_nfa nfa) +struct ll *ll(void *p) { - struct rgx_nfa * res; - res = malloc(sizeof(struct rgx_nfa)); - *res = nfa; + struct ll *res; + res = malloc(sizeof(struct ll)); + *res = (struct ll){.p = p, .next = NULL}; return res; } -struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v) { - unsigned char i=-1; - struct frag stack[0xff]; - struct patch_list * union_pl = NULL; - - if (l) { - struct rgx_nfa *r; - r = mknfa((struct rgx_nfa){UNION, {l, NULL}, 1}); - stack[++i] = (struct frag){r, pl_append(NULL, &r->node[1])}; +void transfer(struct ll **dest, struct ll **src) +{ + if (!(*dest)) + { + *dest = *src; + *src = NULL; + return; } - for(;*s; ++s) - { - struct rgx_nfa *r; - struct frag a, b; + while ((*dest)->next) + dest = &((*dest)->next); - switch(*s) + (*dest)->next = *src; + *src = NULL; +} + +void patch(struct frag *dest, struct frag *src) +{ + struct ll *t, *pl = dest->pl; + + int epsilon_loop = 0; + while (t = pl) + { + struct ll *d_ul, *s_ul; + + // Non Union pl + if (!*(void **)pl->p) + { + *(struct frag **)pl->p = src; + free(t); + pl = pl->next; + continue; + } + + // Prepare Ul to patch + d_ul = ((struct frag *)pl->p)->as.ul; + while (d_ul->next) + d_ul = d_ul->next; + + // Patch + if (src->type != UNION) + { + d_ul->next = ll(src); + pl = pl->next; + continue; + } + + // Merge Naive + if (dest->type != UNION) + { + // TODO: Check if src must be free; + d_ul->next = src->as.ul; + pl = pl->next; + continue; + } + + // Merge Clone + s_ul = src->as.ul; + while (s_ul) + { + if (s_ul->p != dest) + { + d_ul = d_ul->next = ll(s_ul->p); + } + else + { + epsilon_loop = 1; + } + s_ul = s_ul->next; + } + + // if no epsilon loop, we should merge naive + pl = pl->next; + } + + if (epsilon_loop) + { + dest->pl->next = src->pl; + } + else + { + dest->pl = src->pl; + } + + src->pl = NULL; +} + +void collapse(struct frag **stack, unsigned char *i) +{ + if (stack[*i]->type == BLOCK) + { + stack[(*i)--]; + return; + } + + while (*i > 0) + { + struct frag *to; + + to = stack[(*i)--]; + + if (stack[*i]->type == BLOCK) + { + stack[*i] = to; + return; + } + + patch(stack[*i], to); + } +} + +/* Frag Helpers */ + +struct frag * final(char final) +{ + struct frag *res; + res = malloc(sizeof(struct frag)); + res->type = FINAL; + res->as.final = final; + return res; +} + +struct frag *rune(char rune) +{ + struct frag *res; + res = malloc(sizeof(struct frag)); + res->type = RUNE; + res->as.rune = rune; + res->next = NULL; + res->pl = ll(&res->next); + return res; +} + +struct frag *any() +{ + struct frag *res; + res = malloc(sizeof(struct frag)); + res->type = ANY; + res->next = NULL; + res->pl = ll(&res->next); + return res; +} + +struct frag *alt(struct frag *frag) +{ + struct frag *res; + res = malloc(sizeof(struct frag)); + res->type = UNION; + res->as.ul = ll(frag); + res->pl = ll(res); + return res; +} + +struct frag * not(struct frag * frag) +{ + struct frag *res; + res = malloc(sizeof(struct frag)); + res->type = NOT; + res->as.try = frag; + res->next = NULL; + res->pl = ll(&res->next); + return res; +} + +struct frag *class(char class) +{ + struct frag *res; + res = malloc(sizeof(struct frag)); + res->type = CLASS; + res->as.class = class; + res->next = NULL; + res->pl = ll(&res->next); + return res; +} + +struct frag *block(void) +{ + struct frag *res; + res = malloc(sizeof(struct frag)); + res->type = BLOCK; + return res; +} + +void *rgx_compile(void *l, char *s, int v) +{ + unsigned char i = 0xff, j = 0; + struct frag *stack[0xff]; + struct ll *ul[0xff] = {NULL}; + + for (; *s; ++s) + { + struct frag *r; + + switch (*s) { case '(': - r = mknfa((struct rgx_nfa){BLOCK, {NULL}, 0}); - stack[++i] = (struct frag){r, NULL }; + stack[++i] = block(); + ul[++j] = NULL; break; case ')': collapse(stack, &i); - pl_merge(&stack[i].pl, union_pl); - union_pl=NULL; + + if (j != 0xff) + transfer(&stack[i]->pl, &ul[j--]); + break; case '|': collapse(stack, &i); - pl_merge(&union_pl, stack[i].pl); + transfer(&ul[j], &stack[i]->pl); + stack[i] = alt(stack[i]); - if (i!= 0) { + if (i != 0xff) + { + struct frag *a; a = stack[i--]; - r = mknfa((struct rgx_nfa){BLOCK, {NULL}, 0}); - stack[++i] = (struct frag){r, NULL }; - stack[++i]= a; + stack[++i] = block(); + stack[++i] = a; } - r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0}); - stack[i] = (struct frag){r, pl_append(NULL, &r->node[1])}; break; - case '\\': - switch(*(++s)) + switch (*(++s)) { case 's': case 'd': case 'w': - a.start = mknfa((struct rgx_nfa){CLASS, {NULL}, *s}); - a.pl = pl_append(NULL, &a.start->node[0]); - stack[++i] = a; + stack[++i] = class(*s); break; case '\\': case '|': @@ -150,112 +282,183 @@ struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v) { case '*': case '+': case '?': - a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s}); - a.pl = pl_append(NULL, &a.start->node[0]); - stack[++i] = a; + case '\'': + stack[++i] = rune(*s); break; default: - a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, '\\'}); - a.pl = pl_append(NULL, &a.start->node[0]); - stack[++i] = a; - - a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s}); - a.pl = pl_append(NULL, &a.start->node[0]); - stack[++i] = a; + stack[++i] = rune('\\'); + stack[++i] = rune(*s); break; } break; - case '*': - r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0}); - patch(stack[i].pl, r); - stack[i] = (struct frag){r, pl_append(NULL, &r->node[1])}; + case '\'': + if (i == 0xff) + { + stack[++i] = rune(*s); + break; + } + patch(stack[i], final(0)); + stack[i] = not(stack[i]); + break; + case '*': + if (i == 0xff) + { + stack[++i] = rune(*s); + break; + } + + r = alt(stack[i]); + patch(stack[i], r); + r->pl = stack[i]->pl; + stack[i] = r; + break; + case '?': + + if (i == 0xff) + { + stack[++i] = rune(*s); + break; + } + + r = alt(stack[i]); + transfer(&r->pl, &stack[i]->pl); + stack[i] = r; break; - case '?': - r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0}); - stack[i] = (struct frag){r, pl_append(stack[i].pl, &r->node[1]) }; - break; case '+': - r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0}); - stack[i].pl = pl_append(NULL, &patch(stack[i].pl, r)->node[1]); + if (i == 0xff) + { + stack[++i] = rune(*s); + break; + } + + r = alt(stack[i]); + patch(stack[i], r); break; case '.': - a.start = mknfa((struct rgx_nfa){ANY, {NULL}}); - a.pl = pl_append(NULL, &a.start->node[0]); - stack[++i] = a; + stack[++i] = any(); break; default: - a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s}); - a.pl = pl_append(NULL, &a.start->node[0]); - stack[++i] = a; + stack[++i] = rune(*s); break; } } // Collapse and add FINAL state - while ( i!=0 ) { - if (stack[i].start->op == BLOCK ) - stack[i--]; + while (i) + { collapse(stack, &i); + if (j != 0xff) + transfer(&stack[i]->pl, &ul[j--]); } - pl_merge(&stack[i].pl, union_pl); - union_pl = NULL; + patch(stack[i], final(v)); - patch(stack[i].pl, mknfa((struct rgx_nfa){FINAL, {NULL}, v})); - return stack[i].start; + // Merge w/ l + if (l) + patch(stack[i], alt(l)); + + return stack[i]; } -int rgx_run(struct rgx_nfa *l, char *s) { +int rgx_run(void *p, char *s) +{ + struct frag *l = p; + while (l) { - switch (l->op) + switch (l->type) { - case FINAL: - //printf("FINAL: %c\n", *s? *s:'0'); - return *s ? 0: l->c; - case ANY: - //printf("ANY: %c\n", *s? *s:'0'); - if (!*(s++)) + case NOT: +#ifdef DEBUG + printf("[%p] NOT\n", l); +#endif + if (rgx_run(l->as.try, s) != 0) return 0; - l = l->node[0]; + + if (!*(s++)) + return 0; + l = l->next; + break; + case FINAL: +#ifdef DEBUG + printf("[%p] FINAL: %c\n", l, *s ? *s : '0'); +#endif + if (l->as.final == 0) + return 1; + + return *s ? 0 : l->as.final; + case ANY: +#ifdef DEBUG + printf("[%p] ANY: %c\n", l, *s ? *s : '0'); +#endif + if (!*(s++)) + return 0; + l = l->next; break; case CLASS: - //printf("CLASS(%c): %c\n", l->c, *s? *s: '0'); - switch(l->c) +#ifdef DEBUG + printf("[%p] CLASS(%c): %c\n", l, l->as.class, + *s ? *s : '0'); +#endif + switch (l->as.class) { case 's': - if(!isspace(*(s++))) + if (!isspace(*(s++))) return 0; - l = l->node[0]; + l = l->next; break; case 'd': - if(!isdigit(*(s++))) + if (!isdigit(*(s++))) return 0; - l = l->node[0]; + l = l->next; break; case 'w': - if(!isalpha(*(s++))) + if (!isalpha(*(s++))) return 0; - l = l->node[0]; + l = l->next; break; default: return 0; } break; case RUNE: - //printf("RUNE(%c): %c\n", l->c, *s?*s:'0'); - if (*(s++) != l->c) +#ifdef DEBUG + printf("[%p] RUNE(%c): %c\n", l, l->as.rune, + *s ? *s : '0'); +#endif + if (*(s++) != l->as.rune) return 0; - l = l->node[0]; + l = l->next; break; case UNION: - //printf("{\n"); - int res = rgx_run(l->node[0], s); - //printf("}\n"); - if (res) - return res; - l = l->node[1]; - break; + { + int res; + struct ll *ul = l->as.ul; +#ifdef DEBUG + printf("[%p] UNION\n", l); +#endif + while (ul) + { + res = rgx_run(ul->p, s); + if (res != 0) + return res; + + ul = ul->next; + } +#ifdef DEBUG + printf("[%p] END\n", l); +#endif + return 0; + } + default: +#ifdef DEBUG + printf("[%p]\n", l); +#endif + return 0; } } + +#ifdef DEBUG + printf("[%p]\n", l); +#endif return 0; } diff --git a/src/main.c b/src/main.c deleted file mode 100644 index f949ea7..0000000 --- a/src/main.c +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include "main.h" - -char * tokens[] = -{ - "\"(\\\"|\"!)*\"", - "\\d+", - "L|R|N", - "\\w+", - "\\s+", - "[\\s*", - "\\s*]", - "\\s*:\\s*", - "\\s*\\|\\s*", - "\\s*<\\s*", - "\\s*;", - NULL, -}; - -int main(void) -{ - int i; - struct rgx_nfa * l = NULL; - - //for (i=0; tokens[i]; ++i) { - l=rgx_compile(l, tokens[0], 1); - //} - - printf("\n%d\n", rgx_run(l, "\"\"")) ; - printf("\n%d\n", rgx_run(l, "\"b\"")) ; - printf("\n%d\n", rgx_run(l, "\"bb\"")) ; - printf("\n%d\n", rgx_run(l, "\"\"\"")) ; - printf("\n%d\n", rgx_run(l, "\"\\\"\"")) ; -} diff --git a/src/main.h b/src/main.h index c79a8a8..9207d67 100644 --- a/src/main.h +++ b/src/main.h @@ -1,9 +1,2 @@ -struct rgx_nfa -{ - enum {RUNE, ANY, UNION, FINAL, CLASS, BLOCK} op; - struct rgx_nfa *node[2]; - unsigned char c; -}; - -struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v); -int rgx_run(struct rgx_nfa * l, char * s); +void *rgx_compile(void *l, char *s, int v); +int rgx_run(void *l, char *s); diff --git a/src/test.c b/src/test.c index a0fb3a5..5683b6b 100644 --- a/src/test.c +++ b/src/test.c @@ -1,94 +1,494 @@ -#include #include "main.h" +#include struct test { - char * regex; + char *regex; struct match { - char * s; + char *s; int expect; } matches[0xff]; -} test_suite[] = -{ +} test_suite[] = { // Basic - {"abc", {{"a", 0},{"abc", 1}, {"abcd",0 }, {NULL}}}, - {"a.c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"aac", 1}, {NULL}}}, - {"ab+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {NULL}}}, - {"ab*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}}, - {"ab?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {NULL}}}, + { + "abc", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {NULL}, + }, + }, + { + "a.c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"acc", 1}, + {"aac", 1}, + {NULL}, + }, + }, + { + "ab*c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 1}, + {"ab", 0}, + {NULL}, + }, + }, + { + "ab+c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 0}, + {NULL}, + }, + }, + { + "ab?c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 0}, + {"ac", 1}, + {NULL}, + }, + }, // . Compose - {"a..c", {{"a", 0},{"abc", 0}, {"abcd",0 }, {"acc", 0}, {"ac", 0}, {"abbc", 1}, {NULL}}}, - {"a.+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 0}, {"abbc", 1}, {NULL}}}, - {"a.*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {"abbc", 1}, {NULL}}}, - {"a.?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {NULL}}}, - - // TODO: Implement NFA to DFA to avoid loops + { + "a..c", + { + {"a", 0}, + {"abc", 0}, + {"abcd", 0}, + {"acc", 0}, + {"ac", 0}, + {"abbc", 1}, + {NULL}, + }, + }, + { + "a.+c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"acc", 1}, + {"ac", 0}, + {"abbc", 1}, + {NULL}, + }, + }, + { + "a.*c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"acc", 1}, + {"ac", 1}, + {"abbc", 1}, + {NULL}, + }, + }, + { + "a.?c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"acc", 1}, + {"ac", 1}, + {NULL}, + }, + }, // Equal to * - {"ab+?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}}, - {"ab*?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}}, - //{"ab?+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}}, - //{"ab?*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}}, - {"a+*", {{"aaaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1}, {NULL}}}, - //{"a**", {{"aaaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1}, {NULL}}}, + { + "ab+?c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 1}, + {"ab", 0}, + {NULL}, + }, + }, + { + "ab*?c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 1}, + {"ab", 0}, + {NULL}, + }, + }, - // Nilpotent - {"ab??c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {"c", 0}, {NULL}}}, - {"ab++c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {"c", 0}, {"ababc",0}, {NULL}}}, - //{"ab**c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {"c", 1}, {"ababc",1}, {NULL}}}, + { + "ab+*c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 1}, + {"ab", 0}, + {NULL}, + }, + }, + // Epsilon-Loops + { + "ab?+c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 1}, + {"ab", 0}, + {NULL}, + }, + }, + { + "ab?*c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 1}, + {"ab", 0}, + {NULL}, + }, + }, + // Nillpotent + { + "ab**c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 1}, + {"ab", 0}, + {NULL}, + }, + }, + { + "ab++c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 1}, + {"ac", 0}, + {NULL}, + }, + }, + { + "ab??c", + { + {"a", 0}, + {"abc", 1}, + {"abcd", 0}, + {"abbc", 0}, + {"ac", 1}, + {NULL}, + }, + }, // UNION - {"a()", {{"a", 1},{"abc", 0}, {"b", 0 }, {"ba", 0}, {NULL}}}, - {"a(b)", {{"ab", 1},{"abc", 0}, {"b", 0 }, {"ba", 0}, {NULL}}}, - {"a(b)*", {{"ab", 1},{"abb", 1}, {"a", 1 }, {"ba", 0}, {NULL}}}, - {"a|b", {{"a", 1},{"abc", 0}, {"b", 1 }, {"ba", 0}, {NULL}}}, - {"(a|.)*", {{"a", 1},{"abc", 1}, {"b", 1 }, {"ba", 1}, {NULL}}}, - {"(.|a)*", {{"a", 1},{"abc", 1}, {"b", 1 }, {"ba", 1}, {NULL}}}, - {"(b|a)*", {{"a", 1},{"cab", 0}, {"b", 1 }, {"ab", 1}, {"cacb", 0}, {NULL}}}, - {"c(b|a)*", {{"ca", 1},{"cab", 1}, {"b", 0 }, {"bb", 0}, {"cacb", 0}, {NULL}}}, - {"ab|b", {{"ab", 1}, {"b", 1}, {"abb", 0}, {NULL}}}, - {"a|b|c", {{"a", 1}, {"b", 1}, {"c", 1},{"d",0}, {NULL}}}, - {"((a|b)|c)d", {{"ad", 1}, {"bd", 1}, {"cd", 1},{"a",0}, {NULL}}}, + { + "a()", + { + {"a", 1}, + {"abc", 0}, + {"b", 0}, + {"ba", 0}, + {NULL}, + }, + }, + { + "a(b)", + { + {"ab", 1}, + {"abc", 0}, + {"b", 0}, + {"ba", 0}, + {NULL}, + }, + }, + { + "a(b)*", + { + {"ab", 1}, + {"abb", 1}, + {"a", 1}, + {"ba", 0}, + {NULL}, + }, + }, + { + "a|b", + { + {"a", 1}, + {"abc", 0}, + {"b", 1}, + {"ba", 0}, + {NULL}, + }, + }, + { + "a|b*", + { + {"a", 1}, + {"abc", 0}, + {"b", 1}, + {"bb", 1}, + {NULL}, + }, + }, + { + "(a|.)*", + { + {"a", 1}, + {"abc", 1}, + {"b", 1}, + {"ba", 1}, + {NULL}, + }, + }, + { + "(.|a)*", + { + {"a", 1}, + {"abc", 1}, + {"b", 1}, + {"ba", 1}, + {NULL}, + }, + }, + { + "c(b|a)*", + { + {"ca", 1}, + {"cab", 1}, + {"b", 0}, + {"bb", 0}, + {"cacb", 0}, + {NULL}, + }, + }, + { + "ab|b", + { + {"ab", 1}, + {"b", 1}, + {"abb", 0}, + {NULL}, + }, + }, + { + "a|b|c", + { + {"a", 1}, + {"b", 1}, + {"c", 1}, + {"d", 0}, + {NULL}, + }, + }, + { + "((a|b)|c)d", + { + {"ad", 1}, + {"bd", 1}, + {"cd", 1}, + {"a", 0}, + {NULL}, + }, + }, // BLOCK - {"(abb", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, - {"abb)", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, - {"a(bb", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, - {"ab)b", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, - {"(ab)b", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, - {"(ab)+", {{"ab", 1}, {"abab", 1}, {"b", 0}, {NULL}}}, - {"(a(bc)*)", {{"a", 1}, {"abc", 1}, {"abb", 0}, {NULL}}}, - {"(ab|cd)", {{"ab", 1}, {"cd", 1}, {"abcd", 0}, {NULL}}}, - - // REDOS - {"a*b?a*", {{"aaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", 1}, {NULL}}}, -} ; + { + "(abb", + { + {"abb", 1}, + {"ab", 0}, + {"b", 0}, + {NULL}, + }, + }, + { + "abb)", + { + {"abb", 1}, + {"ab", 0}, + {"b", 0}, + {NULL}, + }, + }, + { + "a(bb", + { + {"abb", 1}, + {"ab", 0}, + {"b", 0}, + {NULL}, + }, + }, + { + "ab)b", + { + {"abb", 1}, + {"ab", 0}, + {"b", 0}, + {NULL}, + }, + }, + { + "(ab)b", + { + {"abb", 1}, + {"ab", 0}, + {"b", 0}, + {NULL}, + }, + }, + { + "(ab)+", + { + {"ab", 1}, + {"abab", 1}, + {"b", 0}, + {NULL}, + }, + }, + { + "(a(bc)*)", + { + {"a", 1}, + {"abc", 1}, + {"abb", 0}, + {NULL}, + }, + }, + { + "(ab|cd)", + { + {"ab", 1}, + {"cd", 1}, + {"abcd", 0}, + {NULL}, + }, + }, + // NEG + { + "a'", + { + {"a", 0}, + {"b", 1}, + {NULL}, + }, + }, + { + "a''", + { + {"a", 1}, + {"b", 0}, + {NULL}, + }, + }, + { + "a|b'", + { + {"a", 1}, + {"b", 0}, + {"c", 1}, + {NULL}, + }, + }, + { + "{({|})'*}", + { + {"{}", 1}, + {"{{}", 0}, + {"{}}", 0}, + {NULL}, + }, + }, + { + "\"(\\\\.|(\"|\\\\)')*\"", + { + {"\"\"", 1}, + {"\"\"\"", 0}, + {"\"\\\"", 0}, + {"\"lsk\\\"lsdk\"", 1}, + {NULL}, + }, + }, + { + "\"((\"|\\\\)'|\\\\.)*\"", + { + {"\"\"", 1}, + {"\"\"\"", 0}, + {"\"\\\"", 0}, + {"\"lsk\\\"lsdk\"", 1}, + {NULL}, + }, + }, + { + "\\\\\\*(\\*\\\\)'*)\\*\\\\", + { + {"\\*lskd*\\", 1}, + {"\\****\\", 1}, + {"\\*\\\\*\\", 1}, + {"\\*ls*\\ lsdk *\\", 0}, + {NULL}, + }, + }, +}; int main(void) { - int i=0, j=0; + int i = 0; - for(i =0; is; ++m) + for (struct match *m = t.matches; m->s; ++m) { int res; - res = rgx_run(nfa, m->s); - printf("%s: %d %s\n", res == m->expect?"PASS":"FAIL", res, m->s); + res = rgx_run(nfa, m->s); + printf("%s: %d %s\n", + res == m->expect ? "PASS" : "FAIL", res, m->s); + + /* + if (res != m->expect) + return 0; + */ } } }