First workin regex engine

This commit is contained in:
PedroEdiaz
2025-12-04 21:57:58 -06:00
commit 6acef48f09
5 changed files with 261 additions and 0 deletions

167
src/lexer.c Normal file
View File

@@ -0,0 +1,167 @@
#include <stdlib.h>
#include <stdio.h>
#include "main.h"
struct patch_list
{
struct rgx_nfa **ptr;
struct patch_list *next;
};
struct frag
{
struct rgx_nfa *start;
struct patch_list *out;
};
struct patch_list * append(struct patch_list * pl, struct rgx_nfa ** nfa)
{
struct patch_list * res, * root = pl;
res = malloc(sizeof(struct patch_list));
*res = (struct patch_list){nfa, NULL};
if (pl == 0 ) {
return res;
}
while (pl->next) {
pl = pl->next;
}
pl->next = res;
return root;
}
struct rgx_nfa * patch(struct patch_list *pl, struct rgx_nfa *to) {
while (pl) {
*pl->ptr = to;
pl = pl->next;
}
return to;
}
struct rgx_nfa * mknfa(struct rgx_nfa nfa)
{
struct rgx_nfa * res;
res = malloc(sizeof(struct rgx_nfa));
*res = nfa;
return res;
}
struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v) {
struct rgx_nfa *r;
unsigned char i=-1;
struct frag stack[0xff], a, b;
for(;*s; ++s)
{
switch(*s)
{
case '|':
break;
case '*':
b = stack[i--];
if (i==0xff)
{
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
patch(b.out, r);
stack[++i] = (struct frag){r, append(NULL, &r->node[1])};
break;
}
else
{
a = stack[i--];
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
patch(a.out, r);
patch(b.out, r);
a.out = append(NULL, &r->node[1]);
stack[++i] = a;
break;
}
case '?':
b = stack[i--];
if (i==0xff)
{
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
b.out = append(b.out, &r->node[1]);
stack[++i] = (struct frag){r, b.out };
break;
}
else
{
a = stack[i--];
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
a.out = append(b.out, &patch(a.out, r)->node[1]);
stack[++i] = a;
break;
}
case '+':
// This implemention may cause a infinite loop that crashes.
a = stack[i--];
r = mknfa((struct rgx_nfa){UNION, {NULL, a.start}});
a.out = append(NULL, &patch(a.out, r)->node[0]);
stack[++i] = a;
break;
case '.':
a.start = mknfa((struct rgx_nfa){C_ANY, {NULL}});
a.out = append(NULL, &a.start->node[0]);
stack[++i] = a;
break;
default:
a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s});
a.out = append(NULL, &a.start->node[0]);
stack[++i] = a;
break;
}
}
// Append Final
{
stack[++i].start = mknfa((struct rgx_nfa){FINAL, {NULL}, v});
stack[i].out = append(NULL, &stack[i].start->node[0]);
}
// Collapse Stack
while (i > 0)
{
struct frag to;
to = stack[i--];
patch(stack[i].out, to.start);
}
return stack[i].start;
}
int rgx_run(struct rgx_nfa *l, char *s) {
while (l)
{
switch (l->op)
{
case FINAL:
return *s ? 0: l->c;
case C_ANY:
if (!*(s++))
return 0;
l = l->node[0];
break;
case RUNE:
if (*(s++) != l->c)
return 0;
l = l->node[0];
break;
case UNION:
int res = rgx_run(l->node[0], s);
if (res)
return res;
l = l->node[1];
break;
}
}
return 0;
}

0
src/main.c Normal file
View File

9
src/main.h Normal file
View File

@@ -0,0 +1,9 @@
struct rgx_nfa
{
enum {RUNE, C_ANY, UNION, FINAL} op;
struct rgx_nfa *node[2];
unsigned char c;
};
struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v);
int rgx_run(struct rgx_nfa * l, char * s);

70
src/test.c Normal file
View File

@@ -0,0 +1,70 @@
#include <stdio.h>
#include "main.h"
struct test
{
char * regex;
struct match
{
char * s;
int expect;
} matches[0xff];
} test_suite[] =
{
// Basic
{"abc", {{"a", 0},{"abc", 1}, {"abcd",0 }, {NULL}}},
{"a.c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"aac", 1}, {NULL}}},
{"ab+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {NULL}}},
{"ab*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}},
{"ab?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {NULL}}},
// . Compose
{"a..c", {{"a", 0},{"abc", 0}, {"abcd",0 }, {"acc", 0}, {"ac", 0}, {"abbc", 1}, {NULL}}},
{"a.+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 0}, {"abbc", 1}, {NULL}}},
{"a.*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {"abbc", 1}, {NULL}}},
{"a.?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {NULL}}},
// Equal to *
{"ab+?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}},
{"ab*?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}},
{"ab?+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}},
{"ab?*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}},
// . Alt
//{"a|b", {{"a", 1},{"abc", 0}, {"b", 1 }, {"ba", 0}, {NULL}}},
{"ab??c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {"c", 1}, {NULL}}},
{"ab++c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {"c", 0}, {"ababc",0}, {NULL}}},
{"ab**c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {"c", 1}, {"ababc",1}, {NULL}}},
// Xd
{"a?", {{"aaaa", 0}, {"a", 1}, {NULL}}},
{"a*", {{"aaaa", 1}, {"a", 1}, {NULL}}},
// REDOS
{"a+*", {{"aaaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1}, {NULL}}},
{"a*b?a*", {{"aaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", 1}, {NULL}}},
{"aa*+", {{"aaaa", 1}, {NULL}}},
{"a*+", {{"a", 1}, {NULL}}},
} ;
int main(void)
{
int i=0, j=0;
for(i =0; i<sizeof(test_suite)/sizeof(*test_suite); ++i)
{
struct test t = test_suite[i];
struct rgx_nfa * nfa;
printf("\n\t%s\n", t.regex);
nfa = rgx_compile(NULL, t.regex, 1);
for( struct match * m = t.matches; m->s; ++m)
{
int res;
res = rgx_run(nfa, m->s);
printf("%s: %d %s\n", res == m->expect?"PASS":"FAIL", res, m->s);
}
}
}