Reduce leaks ad maximum, only leaks on a++

This commit is contained in:
PedroEdiaz
2026-01-08 22:39:05 -06:00
parent a15e1c212a
commit 9c4acf9519
4 changed files with 166 additions and 80 deletions

View File

@@ -5,18 +5,17 @@ DOC = doc/main.pdf
OBJ = \ OBJ = \
src/lexer.o src/lexer.o
all: $(BIN) $(TEST) $(DOC) CFLAGS = -g
all: $(TEST) $(DOC)
test: $(TEST) test: $(TEST)
$(DOC): doc/main.typst $(DOC): doc/main.typst
typst compile $< $@ typst compile $< $@
$(BIN): $(OBJ) $(BIN).o
$(CC) -g -o $(BIN) $(BIN).o $(OBJ)
$(TEST): $(OBJ) $(TEST).o $(TEST): $(OBJ) $(TEST).o
$(CC) -g -o $(TEST) $(TEST).o $(OBJ) $(CC) $(CFLAGS) -o $(TEST) $(TEST).o $(OBJ)
fmt: fmt:
clang-format -i src/*.c src/*.h clang-format -i src/*.c src/*.h

View File

@@ -1,10 +1,13 @@
#include "main.h"
#include <ctype.h> #include <ctype.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#define DEBUG 0 #define DEBUG
#ifdef DEBUG #ifdef DEBUG
#include <assert.h>
#include <stdio.h> #include <stdio.h>
#endif #endif
@@ -32,8 +35,11 @@ struct frag
char class; char class;
char final; char final;
struct frag *try; struct frag *try;
struct frag * not ; struct
struct ll *ul; {
struct ll *ul;
char ref;
} alt;
} as; } as;
struct frag *next; struct frag *next;
struct ll *pl; struct ll *pl;
@@ -47,6 +53,7 @@ struct ll *ll(void *p)
return res; return res;
} }
// Transfer ownership of pl, to avoid sharing and free cleanly.
void transfer(struct ll **dest, struct ll **src) void transfer(struct ll **dest, struct ll **src)
{ {
if (!(*dest)) if (!(*dest))
@@ -65,66 +72,78 @@ void transfer(struct ll **dest, struct ll **src)
void patch(struct frag *dest, struct frag *src) void patch(struct frag *dest, struct frag *src)
{ {
struct ll *t, *pl = dest->pl; struct ll *pl = NULL, *pl_root;
int epsilon_loop = 0; while (dest->pl)
while (t = pl)
{ {
struct ll *d_ul, *s_ul; struct ll *d_ul, *s_ul;
// Non Union pl // if dest->pl->p is not an union Patch as Crox.
if (!*(void **)pl->p) if (!*(void **)dest->pl->p)
{ {
*(struct frag **)pl->p = src; *(struct frag **)dest->pl->p = src;
free(t); goto free;
pl = pl->next;
continue;
} }
// Prepare Ul to patch // dest->pl is union, prepare d_ul to append
d_ul = ((struct frag *)pl->p)->as.ul; d_ul = ((struct frag *)dest->pl->p)->as.alt.ul;
while (d_ul->next) while (d_ul->next)
d_ul = d_ul->next; d_ul = d_ul->next;
// Patch // if src is not union, append src to d_ul
if (src->type != UNION) if (src->type != UNION)
{ {
d_ul->next = ll(src); d_ul->next = ll(src);
pl = pl->next; goto free;
continue;
} }
// Merge Naive // merge s_ul do d_ul with out epsilon_loops.
if (dest->type != UNION) s_ul = src->as.alt.ul;
{
// TODO: Check if src must be free;
d_ul->next = src->as.ul;
pl = pl->next;
continue;
}
// Merge Clone
s_ul = src->as.ul;
while (s_ul) while (s_ul)
{ {
if (s_ul->p != dest) if (s_ul->p != dest && dest->type == UNION)
{ {
d_ul = d_ul->next = ll(s_ul->p); d_ul = d_ul->next = ll(s_ul->p);
} }
else
{
epsilon_loop = 1;
}
s_ul = s_ul->next; s_ul = s_ul->next;
} }
// if no epsilon loop, we should merge naive /*
pl = pl->next; PEDRO: For some reason we need to keep inherit all unions on pl
(dest->pl->p), insted of inherit only dest when dest->type ==
UNION, as I expect.
Note that for all test, at this point of the code, we have:
assert( (dest->type == UNION) == (dest == dest->pl->p));
*/
if (!pl)
{
pl = pl_root = ll(dest->pl->p);
}
else
{
pl->next = ll(dest->pl->p);
}
free:
// dest_pl must be free always, we avoid sharing data.
{
struct ll *to_free = dest->pl;
dest->pl = dest->pl->next;
free(to_free);
}
} }
if (epsilon_loop) #ifdef DEBUG
assert(!dest->pl);
#endif
// Append inherit pl from dest->pl
if (pl)
{ {
dest->pl->next = src->pl; dest->pl = pl_root;
pl->next = src->pl;
} }
else else
{ {
@@ -138,6 +157,7 @@ void collapse(struct frag **stack, unsigned char *i)
{ {
if (stack[*i]->type == BLOCK) if (stack[*i]->type == BLOCK)
{ {
free(stack[*i]);
stack[(*i)--]; stack[(*i)--];
return; return;
} }
@@ -145,11 +165,11 @@ void collapse(struct frag **stack, unsigned char *i)
while (*i > 0) while (*i > 0)
{ {
struct frag *to; struct frag *to;
to = stack[(*i)--]; to = stack[(*i)--];
if (stack[*i]->type == BLOCK) if (stack[*i]->type == BLOCK)
{ {
free(stack[*i]);
stack[*i] = to; stack[*i] = to;
return; return;
} }
@@ -195,7 +215,8 @@ struct frag *alt(struct frag *frag)
struct frag *res; struct frag *res;
res = malloc(sizeof(struct frag)); res = malloc(sizeof(struct frag));
res->type = UNION; res->type = UNION;
res->as.ul = ll(frag); res->as.alt.ul = ll(frag);
res->as.alt.ref = 0;
res->pl = ll(res); res->pl = ll(res);
return res; return res;
} }
@@ -230,7 +251,7 @@ struct frag *block(void)
return res; return res;
} }
void *rgx_compile(void *l, char *s, int v) void lx_append(lx_lexer *l, char *s, int v)
{ {
unsigned char i = 0xff, j = 0; unsigned char i = 0xff, j = 0;
struct frag *stack[0xff]; struct frag *stack[0xff];
@@ -292,7 +313,7 @@ void *rgx_compile(void *l, char *s, int v)
} }
break; break;
case '\'': case '\'':
if (i == 0xff) if (i == 0xff || stack[i]->type == BLOCK)
{ {
stack[++i] = rune(*s); stack[++i] = rune(*s);
break; break;
@@ -301,7 +322,7 @@ void *rgx_compile(void *l, char *s, int v)
stack[i] = not(stack[i]); stack[i] = not(stack[i]);
break; break;
case '*': case '*':
if (i == 0xff) if (i == 0xff || stack[i]->type == BLOCK)
{ {
stack[++i] = rune(*s); stack[++i] = rune(*s);
break; break;
@@ -309,23 +330,22 @@ void *rgx_compile(void *l, char *s, int v)
r = alt(stack[i]); r = alt(stack[i]);
patch(stack[i], r); patch(stack[i], r);
r->pl = stack[i]->pl; transfer(&r->pl, &stack[i]->pl);
stack[i] = r; stack[i] = r;
break; break;
case '?': case '?':
if (i == 0xff || stack[i]->type == BLOCK)
if (i == 0xff)
{ {
stack[++i] = rune(*s); stack[++i] = rune(*s);
break; break;
} }
r = alt(stack[i]); r = alt(stack[i]);
transfer(&r->pl, &stack[i]->pl); r->pl->next = stack[i]->pl;
stack[i] = r; stack[i] = r;
break; break;
case '+': case '+':
if (i == 0xff) if (i == 0xff || stack[i]->type == BLOCK)
{ {
stack[++i] = rune(*s); stack[++i] = rune(*s);
break; break;
@@ -350,16 +370,80 @@ void *rgx_compile(void *l, char *s, int v)
if (j != 0xff) if (j != 0xff)
transfer(&stack[i]->pl, &ul[j--]); transfer(&stack[i]->pl, &ul[j--]);
} }
patch(stack[i], final(v)); patch(stack[i], final(v));
// Merge w/ l // Merge w/ l
if (l) if (*l)
patch(stack[i], alt(l)); patch(stack[i], alt(*l));
return stack[i]; *l = stack[i];
return;
} }
int rgx_run(void *p, char *s) void _lx_free(lx_lexer p, lx_lexer *visited, unsigned char *i)
{
struct frag *l = p;
while (l)
{
unsigned char j;
lx_lexer to_free;
for (j = 0; j <= *i; ++j)
if (visited[j] == l)
return;
to_free = visited[++(*i)] = l;
switch (l->type)
{
case NOT:
_lx_free(l->as.try, visited, i);
case ANY:
case CLASS:
case RUNE:
l = l->next;
case FINAL:
break;
case UNION:
{
struct ll *ul = l->as.alt.ul;
while (ul)
{
struct ll *to_free;
to_free = ul;
_lx_free(ul->p, visited, i);
ul = ul->next;
#ifdef DEBUG
printf("[%p] FREE UL\n", to_free);
#endif
free(to_free);
}
break;
}
default:
#ifdef DEBUG
assert(0 && "UNRECHABLE");
#endif
}
#ifdef DEBUG
printf("[%p] FREE LL\n", to_free);
#endif
free(to_free);
}
}
int lx_free(lx_lexer l)
{
lx_lexer visited[0xff] = {NULL};
unsigned char i = 0;
_lx_free(l, visited, &i);
}
int lx_lex(lx_lexer p, char *s)
{ {
struct frag *l = p; struct frag *l = p;
@@ -371,11 +455,12 @@ int rgx_run(void *p, char *s)
#ifdef DEBUG #ifdef DEBUG
printf("[%p] NOT\n", l); printf("[%p] NOT\n", l);
#endif #endif
if (rgx_run(l->as.try, s) != 0) if (lx_lex(l->as.try, s))
return 0; return 0;
if (!*(s++)) if (!*(s++))
return 0; return 0;
l = l->next; l = l->next;
break; break;
case FINAL: case FINAL:
@@ -427,19 +512,21 @@ int rgx_run(void *p, char *s)
#endif #endif
if (*(s++) != l->as.rune) if (*(s++) != l->as.rune)
return 0; return 0;
l = l->next; l = l->next;
break; break;
case UNION: case UNION:
{ {
int res; int res;
struct ll *ul = l->as.ul; struct ll *ul = l->as.alt.ul;
#ifdef DEBUG #ifdef DEBUG
printf("[%p] UNION\n", l); printf("[%p] UNION\n", l);
#endif #endif
while (ul) while (ul)
{ {
res = rgx_run(ul->p, s); res = lx_lex(ul->p, s);
if (res != 0)
if (res)
return res; return res;
ul = ul->next; ul = ul->next;
@@ -451,9 +538,8 @@ int rgx_run(void *p, char *s)
} }
default: default:
#ifdef DEBUG #ifdef DEBUG
printf("[%p]\n", l); assert(0 && "UNRECHABLE");
#endif #endif
return 0;
} }
} }

View File

@@ -1,2 +1,5 @@
void *rgx_compile(void *l, char *s, int v); typedef void *lx_lexer;
int rgx_run(void *l, char *s);
void lx_append(lx_lexer *l, char *s, int f);
int lx_lex(lx_lexer l, char *s);
int lx_free(lx_lexer l);

View File

@@ -1,10 +1,11 @@
#include "main.h" #include "main.h"
#include <assert.h>
#include <stdio.h> #include <stdio.h>
struct test struct test
{ {
char *regex; char *regex;
struct match struct m
{ {
char *s; char *s;
int expect; int expect;
@@ -65,7 +66,6 @@ struct test
{NULL}, {NULL},
}, },
}, },
// . Compose // . Compose
{ {
"a..c", "a..c",
@@ -140,7 +140,6 @@ struct test
{NULL}, {NULL},
}, },
}, },
{ {
"ab+*c", "ab+*c",
{ {
@@ -192,6 +191,7 @@ struct test
}, },
}, },
{ {
// This is the only example that leaks
"ab++c", "ab++c",
{ {
{"a", 0}, {"a", 0},
@@ -213,7 +213,6 @@ struct test
{NULL}, {NULL},
}, },
}, },
// UNION // UNION
{ {
"a()", "a()",
@@ -469,26 +468,25 @@ struct test
int main(void) int main(void)
{ {
int i = 0; int i;
for (i = 0; i < sizeof(test_suite) / sizeof(*test_suite); ++i) for (i = 0; i < sizeof(test_suite) / sizeof(*test_suite); ++i)
{ {
struct test t = test_suite[i]; struct test t = test_suite[i];
void *nfa;
printf("\n\t%s\n", t.regex); lx_lexer l = NULL;
nfa = rgx_compile(NULL, t.regex, 1); lx_append(&l, t.regex, 1);
for (struct match *m = t.matches; m->s; ++m)
printf("\n%s\n", t.regex);
for (struct m *m = t.matches; m->s; ++m)
{ {
int res; int res;
res = rgx_run(nfa, m->s); res = lx_lex(l, m->s);
printf("%s: %d %s\n",
res == m->expect ? "PASS" : "FAIL", res, m->s);
/* printf("[%d == %d] %s\n", m->expect, res, m->s);
if (res != m->expect) assert(m->expect == res);
return 0;
*/
} }
lx_free(l);
} }
} }