Reduce leaks ad maximum, only leaks on a++

This commit is contained in:
PedroEdiaz
2026-01-08 22:39:05 -06:00
parent a15e1c212a
commit 9c4acf9519
4 changed files with 166 additions and 80 deletions

View File

@@ -5,18 +5,17 @@ DOC = doc/main.pdf
OBJ = \
src/lexer.o
all: $(BIN) $(TEST) $(DOC)
CFLAGS = -g
all: $(TEST) $(DOC)
test: $(TEST)
$(DOC): doc/main.typst
typst compile $< $@
$(BIN): $(OBJ) $(BIN).o
$(CC) -g -o $(BIN) $(BIN).o $(OBJ)
$(TEST): $(OBJ) $(TEST).o
$(CC) -g -o $(TEST) $(TEST).o $(OBJ)
$(CC) $(CFLAGS) -o $(TEST) $(TEST).o $(OBJ)
fmt:
clang-format -i src/*.c src/*.h

View File

@@ -1,10 +1,13 @@
#include "main.h"
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#define DEBUG 0
#define DEBUG
#ifdef DEBUG
#include <assert.h>
#include <stdio.h>
#endif
@@ -32,8 +35,11 @@ struct frag
char class;
char final;
struct frag *try;
struct frag * not ;
struct ll *ul;
struct
{
struct ll *ul;
char ref;
} alt;
} as;
struct frag *next;
struct ll *pl;
@@ -47,6 +53,7 @@ struct ll *ll(void *p)
return res;
}
// Transfer ownership of pl, to avoid sharing and free cleanly.
void transfer(struct ll **dest, struct ll **src)
{
if (!(*dest))
@@ -65,66 +72,78 @@ void transfer(struct ll **dest, struct ll **src)
void patch(struct frag *dest, struct frag *src)
{
struct ll *t, *pl = dest->pl;
struct ll *pl = NULL, *pl_root;
int epsilon_loop = 0;
while (t = pl)
while (dest->pl)
{
struct ll *d_ul, *s_ul;
// Non Union pl
if (!*(void **)pl->p)
// if dest->pl->p is not an union Patch as Crox.
if (!*(void **)dest->pl->p)
{
*(struct frag **)pl->p = src;
free(t);
pl = pl->next;
continue;
*(struct frag **)dest->pl->p = src;
goto free;
}
// Prepare Ul to patch
d_ul = ((struct frag *)pl->p)->as.ul;
// dest->pl is union, prepare d_ul to append
d_ul = ((struct frag *)dest->pl->p)->as.alt.ul;
while (d_ul->next)
d_ul = d_ul->next;
// Patch
// if src is not union, append src to d_ul
if (src->type != UNION)
{
d_ul->next = ll(src);
pl = pl->next;
continue;
goto free;
}
// Merge Naive
if (dest->type != UNION)
{
// TODO: Check if src must be free;
d_ul->next = src->as.ul;
pl = pl->next;
continue;
}
// Merge Clone
s_ul = src->as.ul;
// merge s_ul do d_ul with out epsilon_loops.
s_ul = src->as.alt.ul;
while (s_ul)
{
if (s_ul->p != dest)
if (s_ul->p != dest && dest->type == UNION)
{
d_ul = d_ul->next = ll(s_ul->p);
}
else
{
epsilon_loop = 1;
}
s_ul = s_ul->next;
}
// if no epsilon loop, we should merge naive
pl = pl->next;
/*
PEDRO: For some reason we need to keep inherit all unions on pl
(dest->pl->p), insted of inherit only dest when dest->type ==
UNION, as I expect.
Note that for all test, at this point of the code, we have:
assert( (dest->type == UNION) == (dest == dest->pl->p));
*/
if (!pl)
{
pl = pl_root = ll(dest->pl->p);
}
else
{
pl->next = ll(dest->pl->p);
}
free:
// dest_pl must be free always, we avoid sharing data.
{
struct ll *to_free = dest->pl;
dest->pl = dest->pl->next;
free(to_free);
}
}
if (epsilon_loop)
#ifdef DEBUG
assert(!dest->pl);
#endif
// Append inherit pl from dest->pl
if (pl)
{
dest->pl->next = src->pl;
dest->pl = pl_root;
pl->next = src->pl;
}
else
{
@@ -138,6 +157,7 @@ void collapse(struct frag **stack, unsigned char *i)
{
if (stack[*i]->type == BLOCK)
{
free(stack[*i]);
stack[(*i)--];
return;
}
@@ -145,11 +165,11 @@ void collapse(struct frag **stack, unsigned char *i)
while (*i > 0)
{
struct frag *to;
to = stack[(*i)--];
if (stack[*i]->type == BLOCK)
{
free(stack[*i]);
stack[*i] = to;
return;
}
@@ -195,7 +215,8 @@ struct frag *alt(struct frag *frag)
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = UNION;
res->as.ul = ll(frag);
res->as.alt.ul = ll(frag);
res->as.alt.ref = 0;
res->pl = ll(res);
return res;
}
@@ -230,7 +251,7 @@ struct frag *block(void)
return res;
}
void *rgx_compile(void *l, char *s, int v)
void lx_append(lx_lexer *l, char *s, int v)
{
unsigned char i = 0xff, j = 0;
struct frag *stack[0xff];
@@ -292,7 +313,7 @@ void *rgx_compile(void *l, char *s, int v)
}
break;
case '\'':
if (i == 0xff)
if (i == 0xff || stack[i]->type == BLOCK)
{
stack[++i] = rune(*s);
break;
@@ -301,7 +322,7 @@ void *rgx_compile(void *l, char *s, int v)
stack[i] = not(stack[i]);
break;
case '*':
if (i == 0xff)
if (i == 0xff || stack[i]->type == BLOCK)
{
stack[++i] = rune(*s);
break;
@@ -309,23 +330,22 @@ void *rgx_compile(void *l, char *s, int v)
r = alt(stack[i]);
patch(stack[i], r);
r->pl = stack[i]->pl;
transfer(&r->pl, &stack[i]->pl);
stack[i] = r;
break;
case '?':
if (i == 0xff)
if (i == 0xff || stack[i]->type == BLOCK)
{
stack[++i] = rune(*s);
break;
}
r = alt(stack[i]);
transfer(&r->pl, &stack[i]->pl);
r->pl->next = stack[i]->pl;
stack[i] = r;
break;
case '+':
if (i == 0xff)
if (i == 0xff || stack[i]->type == BLOCK)
{
stack[++i] = rune(*s);
break;
@@ -350,16 +370,80 @@ void *rgx_compile(void *l, char *s, int v)
if (j != 0xff)
transfer(&stack[i]->pl, &ul[j--]);
}
patch(stack[i], final(v));
// Merge w/ l
if (l)
patch(stack[i], alt(l));
if (*l)
patch(stack[i], alt(*l));
return stack[i];
*l = stack[i];
return;
}
int rgx_run(void *p, char *s)
void _lx_free(lx_lexer p, lx_lexer *visited, unsigned char *i)
{
struct frag *l = p;
while (l)
{
unsigned char j;
lx_lexer to_free;
for (j = 0; j <= *i; ++j)
if (visited[j] == l)
return;
to_free = visited[++(*i)] = l;
switch (l->type)
{
case NOT:
_lx_free(l->as.try, visited, i);
case ANY:
case CLASS:
case RUNE:
l = l->next;
case FINAL:
break;
case UNION:
{
struct ll *ul = l->as.alt.ul;
while (ul)
{
struct ll *to_free;
to_free = ul;
_lx_free(ul->p, visited, i);
ul = ul->next;
#ifdef DEBUG
printf("[%p] FREE UL\n", to_free);
#endif
free(to_free);
}
break;
}
default:
#ifdef DEBUG
assert(0 && "UNRECHABLE");
#endif
}
#ifdef DEBUG
printf("[%p] FREE LL\n", to_free);
#endif
free(to_free);
}
}
int lx_free(lx_lexer l)
{
lx_lexer visited[0xff] = {NULL};
unsigned char i = 0;
_lx_free(l, visited, &i);
}
int lx_lex(lx_lexer p, char *s)
{
struct frag *l = p;
@@ -371,11 +455,12 @@ int rgx_run(void *p, char *s)
#ifdef DEBUG
printf("[%p] NOT\n", l);
#endif
if (rgx_run(l->as.try, s) != 0)
if (lx_lex(l->as.try, s))
return 0;
if (!*(s++))
return 0;
l = l->next;
break;
case FINAL:
@@ -427,19 +512,21 @@ int rgx_run(void *p, char *s)
#endif
if (*(s++) != l->as.rune)
return 0;
l = l->next;
break;
case UNION:
{
int res;
struct ll *ul = l->as.ul;
struct ll *ul = l->as.alt.ul;
#ifdef DEBUG
printf("[%p] UNION\n", l);
#endif
while (ul)
{
res = rgx_run(ul->p, s);
if (res != 0)
res = lx_lex(ul->p, s);
if (res)
return res;
ul = ul->next;
@@ -451,9 +538,8 @@ int rgx_run(void *p, char *s)
}
default:
#ifdef DEBUG
printf("[%p]\n", l);
assert(0 && "UNRECHABLE");
#endif
return 0;
}
}

View File

@@ -1,2 +1,5 @@
void *rgx_compile(void *l, char *s, int v);
int rgx_run(void *l, char *s);
typedef void *lx_lexer;
void lx_append(lx_lexer *l, char *s, int f);
int lx_lex(lx_lexer l, char *s);
int lx_free(lx_lexer l);

View File

@@ -1,10 +1,11 @@
#include "main.h"
#include <assert.h>
#include <stdio.h>
struct test
{
char *regex;
struct match
struct m
{
char *s;
int expect;
@@ -65,7 +66,6 @@ struct test
{NULL},
},
},
// . Compose
{
"a..c",
@@ -140,7 +140,6 @@ struct test
{NULL},
},
},
{
"ab+*c",
{
@@ -192,6 +191,7 @@ struct test
},
},
{
// This is the only example that leaks
"ab++c",
{
{"a", 0},
@@ -213,7 +213,6 @@ struct test
{NULL},
},
},
// UNION
{
"a()",
@@ -469,26 +468,25 @@ struct test
int main(void)
{
int i = 0;
int i;
for (i = 0; i < sizeof(test_suite) / sizeof(*test_suite); ++i)
{
struct test t = test_suite[i];
void *nfa;
printf("\n\t%s\n", t.regex);
nfa = rgx_compile(NULL, t.regex, 1);
for (struct match *m = t.matches; m->s; ++m)
lx_lexer l = NULL;
lx_append(&l, t.regex, 1);
printf("\n%s\n", t.regex);
for (struct m *m = t.matches; m->s; ++m)
{
int res;
res = rgx_run(nfa, m->s);
printf("%s: %d %s\n",
res == m->expect ? "PASS" : "FAIL", res, m->s);
res = lx_lex(l, m->s);
/*
if (res != m->expect)
return 0;
*/
printf("[%d == %d] %s\n", m->expect, res, m->s);
assert(m->expect == res);
}
lx_free(l);
}
}