diff --git a/Makefile b/Makefile index 60d3261..4c16bee 100644 --- a/Makefile +++ b/Makefile @@ -5,18 +5,17 @@ DOC = doc/main.pdf OBJ = \ src/lexer.o -all: $(BIN) $(TEST) $(DOC) +CFLAGS = -g + +all: $(TEST) $(DOC) test: $(TEST) $(DOC): doc/main.typst typst compile $< $@ -$(BIN): $(OBJ) $(BIN).o - $(CC) -g -o $(BIN) $(BIN).o $(OBJ) - $(TEST): $(OBJ) $(TEST).o - $(CC) -g -o $(TEST) $(TEST).o $(OBJ) + $(CC) $(CFLAGS) -o $(TEST) $(TEST).o $(OBJ) fmt: clang-format -i src/*.c src/*.h diff --git a/src/lexer.c b/src/lexer.c index 23ff603..99e310c 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,10 +1,13 @@ +#include "main.h" + #include #include #include -#define DEBUG 0 +#define DEBUG #ifdef DEBUG +#include #include #endif @@ -32,8 +35,11 @@ struct frag char class; char final; struct frag *try; - struct frag * not ; - struct ll *ul; + struct + { + struct ll *ul; + char ref; + } alt; } as; struct frag *next; struct ll *pl; @@ -47,6 +53,7 @@ struct ll *ll(void *p) return res; } +// Transfer ownership of pl, to avoid sharing and free cleanly. void transfer(struct ll **dest, struct ll **src) { if (!(*dest)) @@ -65,66 +72,78 @@ void transfer(struct ll **dest, struct ll **src) void patch(struct frag *dest, struct frag *src) { - struct ll *t, *pl = dest->pl; + struct ll *pl = NULL, *pl_root; - int epsilon_loop = 0; - while (t = pl) + while (dest->pl) { struct ll *d_ul, *s_ul; - // Non Union pl - if (!*(void **)pl->p) + // if dest->pl->p is not an union Patch as Crox. + if (!*(void **)dest->pl->p) { - *(struct frag **)pl->p = src; - free(t); - pl = pl->next; - continue; + *(struct frag **)dest->pl->p = src; + goto free; } - // Prepare Ul to patch - d_ul = ((struct frag *)pl->p)->as.ul; + // dest->pl is union, prepare d_ul to append + d_ul = ((struct frag *)dest->pl->p)->as.alt.ul; while (d_ul->next) d_ul = d_ul->next; - // Patch + // if src is not union, append src to d_ul if (src->type != UNION) { d_ul->next = ll(src); - pl = pl->next; - continue; + goto free; } - // Merge Naive - if (dest->type != UNION) - { - // TODO: Check if src must be free; - d_ul->next = src->as.ul; - pl = pl->next; - continue; - } - - // Merge Clone - s_ul = src->as.ul; + // merge s_ul do d_ul with out epsilon_loops. + s_ul = src->as.alt.ul; while (s_ul) { - if (s_ul->p != dest) + if (s_ul->p != dest && dest->type == UNION) { d_ul = d_ul->next = ll(s_ul->p); } - else - { - epsilon_loop = 1; - } s_ul = s_ul->next; } - // if no epsilon loop, we should merge naive - pl = pl->next; + /* + PEDRO: For some reason we need to keep inherit all unions on pl + (dest->pl->p), insted of inherit only dest when dest->type == + UNION, as I expect. + + Note that for all test, at this point of the code, we have: + assert( (dest->type == UNION) == (dest == dest->pl->p)); + */ + + if (!pl) + { + pl = pl_root = ll(dest->pl->p); + } + else + { + pl->next = ll(dest->pl->p); + } + + free: + // dest_pl must be free always, we avoid sharing data. + { + struct ll *to_free = dest->pl; + dest->pl = dest->pl->next; + free(to_free); + } } - if (epsilon_loop) +#ifdef DEBUG + assert(!dest->pl); +#endif + + // Append inherit pl from dest->pl + if (pl) { - dest->pl->next = src->pl; + dest->pl = pl_root; + pl->next = src->pl; } else { @@ -138,6 +157,7 @@ void collapse(struct frag **stack, unsigned char *i) { if (stack[*i]->type == BLOCK) { + free(stack[*i]); stack[(*i)--]; return; } @@ -145,11 +165,11 @@ void collapse(struct frag **stack, unsigned char *i) while (*i > 0) { struct frag *to; - to = stack[(*i)--]; if (stack[*i]->type == BLOCK) { + free(stack[*i]); stack[*i] = to; return; } @@ -195,7 +215,8 @@ struct frag *alt(struct frag *frag) struct frag *res; res = malloc(sizeof(struct frag)); res->type = UNION; - res->as.ul = ll(frag); + res->as.alt.ul = ll(frag); + res->as.alt.ref = 0; res->pl = ll(res); return res; } @@ -230,7 +251,7 @@ struct frag *block(void) return res; } -void *rgx_compile(void *l, char *s, int v) +void lx_append(lx_lexer *l, char *s, int v) { unsigned char i = 0xff, j = 0; struct frag *stack[0xff]; @@ -292,7 +313,7 @@ void *rgx_compile(void *l, char *s, int v) } break; case '\'': - if (i == 0xff) + if (i == 0xff || stack[i]->type == BLOCK) { stack[++i] = rune(*s); break; @@ -301,7 +322,7 @@ void *rgx_compile(void *l, char *s, int v) stack[i] = not(stack[i]); break; case '*': - if (i == 0xff) + if (i == 0xff || stack[i]->type == BLOCK) { stack[++i] = rune(*s); break; @@ -309,23 +330,22 @@ void *rgx_compile(void *l, char *s, int v) r = alt(stack[i]); patch(stack[i], r); - r->pl = stack[i]->pl; + transfer(&r->pl, &stack[i]->pl); stack[i] = r; break; case '?': - - if (i == 0xff) + if (i == 0xff || stack[i]->type == BLOCK) { stack[++i] = rune(*s); break; } r = alt(stack[i]); - transfer(&r->pl, &stack[i]->pl); + r->pl->next = stack[i]->pl; stack[i] = r; break; case '+': - if (i == 0xff) + if (i == 0xff || stack[i]->type == BLOCK) { stack[++i] = rune(*s); break; @@ -350,16 +370,80 @@ void *rgx_compile(void *l, char *s, int v) if (j != 0xff) transfer(&stack[i]->pl, &ul[j--]); } + patch(stack[i], final(v)); // Merge w/ l - if (l) - patch(stack[i], alt(l)); + if (*l) + patch(stack[i], alt(*l)); - return stack[i]; + *l = stack[i]; + return; } -int rgx_run(void *p, char *s) +void _lx_free(lx_lexer p, lx_lexer *visited, unsigned char *i) +{ + struct frag *l = p; + while (l) + { + unsigned char j; + lx_lexer to_free; + + for (j = 0; j <= *i; ++j) + if (visited[j] == l) + return; + + to_free = visited[++(*i)] = l; + + switch (l->type) + { + case NOT: + _lx_free(l->as.try, visited, i); + case ANY: + case CLASS: + case RUNE: + l = l->next; + case FINAL: + break; + + case UNION: + { + struct ll *ul = l->as.alt.ul; + while (ul) + { + struct ll *to_free; + to_free = ul; + _lx_free(ul->p, visited, i); + ul = ul->next; +#ifdef DEBUG + printf("[%p] FREE UL\n", to_free); +#endif + free(to_free); + } + break; + } + default: +#ifdef DEBUG + assert(0 && "UNRECHABLE"); +#endif + } + +#ifdef DEBUG + printf("[%p] FREE LL\n", to_free); +#endif + free(to_free); + } +} + +int lx_free(lx_lexer l) +{ + lx_lexer visited[0xff] = {NULL}; + unsigned char i = 0; + + _lx_free(l, visited, &i); +} + +int lx_lex(lx_lexer p, char *s) { struct frag *l = p; @@ -371,11 +455,12 @@ int rgx_run(void *p, char *s) #ifdef DEBUG printf("[%p] NOT\n", l); #endif - if (rgx_run(l->as.try, s) != 0) + if (lx_lex(l->as.try, s)) return 0; if (!*(s++)) return 0; + l = l->next; break; case FINAL: @@ -427,19 +512,21 @@ int rgx_run(void *p, char *s) #endif if (*(s++) != l->as.rune) return 0; + l = l->next; break; case UNION: { int res; - struct ll *ul = l->as.ul; + struct ll *ul = l->as.alt.ul; #ifdef DEBUG printf("[%p] UNION\n", l); #endif while (ul) { - res = rgx_run(ul->p, s); - if (res != 0) + res = lx_lex(ul->p, s); + + if (res) return res; ul = ul->next; @@ -451,9 +538,8 @@ int rgx_run(void *p, char *s) } default: #ifdef DEBUG - printf("[%p]\n", l); + assert(0 && "UNRECHABLE"); #endif - return 0; } } diff --git a/src/main.h b/src/main.h index 9207d67..1c33156 100644 --- a/src/main.h +++ b/src/main.h @@ -1,2 +1,5 @@ -void *rgx_compile(void *l, char *s, int v); -int rgx_run(void *l, char *s); +typedef void *lx_lexer; + +void lx_append(lx_lexer *l, char *s, int f); +int lx_lex(lx_lexer l, char *s); +int lx_free(lx_lexer l); diff --git a/src/test.c b/src/test.c index 5683b6b..a2a6af0 100644 --- a/src/test.c +++ b/src/test.c @@ -1,10 +1,11 @@ #include "main.h" +#include #include struct test { char *regex; - struct match + struct m { char *s; int expect; @@ -65,7 +66,6 @@ struct test {NULL}, }, }, - // . Compose { "a..c", @@ -140,7 +140,6 @@ struct test {NULL}, }, }, - { "ab+*c", { @@ -192,6 +191,7 @@ struct test }, }, { + // This is the only example that leaks "ab++c", { {"a", 0}, @@ -213,7 +213,6 @@ struct test {NULL}, }, }, - // UNION { "a()", @@ -469,26 +468,25 @@ struct test int main(void) { - int i = 0; + int i; for (i = 0; i < sizeof(test_suite) / sizeof(*test_suite); ++i) { struct test t = test_suite[i]; - void *nfa; - printf("\n\t%s\n", t.regex); - nfa = rgx_compile(NULL, t.regex, 1); - for (struct match *m = t.matches; m->s; ++m) + lx_lexer l = NULL; + lx_append(&l, t.regex, 1); + + printf("\n%s\n", t.regex); + for (struct m *m = t.matches; m->s; ++m) { int res; - res = rgx_run(nfa, m->s); - printf("%s: %d %s\n", - res == m->expect ? "PASS" : "FAIL", res, m->s); + res = lx_lex(l, m->s); - /* - if (res != m->expect) - return 0; - */ + printf("[%d == %d] %s\n", m->expect, res, m->s); + assert(m->expect == res); } + + lx_free(l); } }