First workin regex engine
This commit is contained in:
15
Makefile
Normal file
15
Makefile
Normal file
@@ -0,0 +1,15 @@
|
||||
BIN = src/main
|
||||
TEST = src/test
|
||||
|
||||
OBJ = \
|
||||
src/lexer.o
|
||||
|
||||
#all: $(BIN) $(TEST)
|
||||
|
||||
test: $(TEST)
|
||||
|
||||
$(BIN): $(OBJ) $(BIN).o
|
||||
$(CC) -g -o $(BIN) $(BIN).o $(OBJ)
|
||||
|
||||
$(TEST): $(OBJ) $(TEST).o
|
||||
$(CC) -g -o $(TEST) $(TEST).o $(OBJ)
|
||||
167
src/lexer.c
Normal file
167
src/lexer.c
Normal file
@@ -0,0 +1,167 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "main.h"
|
||||
|
||||
struct patch_list
|
||||
{
|
||||
struct rgx_nfa **ptr;
|
||||
struct patch_list *next;
|
||||
};
|
||||
|
||||
struct frag
|
||||
{
|
||||
struct rgx_nfa *start;
|
||||
struct patch_list *out;
|
||||
};
|
||||
|
||||
struct patch_list * append(struct patch_list * pl, struct rgx_nfa ** nfa)
|
||||
{
|
||||
struct patch_list * res, * root = pl;
|
||||
res = malloc(sizeof(struct patch_list));
|
||||
*res = (struct patch_list){nfa, NULL};
|
||||
|
||||
if (pl == 0 ) {
|
||||
return res;
|
||||
}
|
||||
|
||||
while (pl->next) {
|
||||
pl = pl->next;
|
||||
}
|
||||
|
||||
pl->next = res;
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
struct rgx_nfa * patch(struct patch_list *pl, struct rgx_nfa *to) {
|
||||
while (pl) {
|
||||
*pl->ptr = to;
|
||||
pl = pl->next;
|
||||
}
|
||||
return to;
|
||||
}
|
||||
|
||||
struct rgx_nfa * mknfa(struct rgx_nfa nfa)
|
||||
{
|
||||
struct rgx_nfa * res;
|
||||
res = malloc(sizeof(struct rgx_nfa));
|
||||
*res = nfa;
|
||||
return res;
|
||||
}
|
||||
|
||||
struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v) {
|
||||
struct rgx_nfa *r;
|
||||
unsigned char i=-1;
|
||||
struct frag stack[0xff], a, b;
|
||||
|
||||
for(;*s; ++s)
|
||||
{
|
||||
|
||||
switch(*s)
|
||||
{
|
||||
case '|':
|
||||
break;
|
||||
|
||||
case '*':
|
||||
b = stack[i--];
|
||||
|
||||
if (i==0xff)
|
||||
{
|
||||
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
|
||||
patch(b.out, r);
|
||||
stack[++i] = (struct frag){r, append(NULL, &r->node[1])};
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
a = stack[i--];
|
||||
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
|
||||
patch(a.out, r);
|
||||
patch(b.out, r);
|
||||
a.out = append(NULL, &r->node[1]);
|
||||
stack[++i] = a;
|
||||
break;
|
||||
}
|
||||
case '?':
|
||||
b = stack[i--];
|
||||
|
||||
if (i==0xff)
|
||||
{
|
||||
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
|
||||
b.out = append(b.out, &r->node[1]);
|
||||
stack[++i] = (struct frag){r, b.out };
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
a = stack[i--];
|
||||
r = mknfa((struct rgx_nfa){UNION, {b.start, NULL}});
|
||||
a.out = append(b.out, &patch(a.out, r)->node[1]);
|
||||
stack[++i] = a;
|
||||
break;
|
||||
}
|
||||
|
||||
case '+':
|
||||
// This implemention may cause a infinite loop that crashes.
|
||||
a = stack[i--];
|
||||
r = mknfa((struct rgx_nfa){UNION, {NULL, a.start}});
|
||||
a.out = append(NULL, &patch(a.out, r)->node[0]);
|
||||
stack[++i] = a;
|
||||
break;
|
||||
case '.':
|
||||
a.start = mknfa((struct rgx_nfa){C_ANY, {NULL}});
|
||||
a.out = append(NULL, &a.start->node[0]);
|
||||
stack[++i] = a;
|
||||
break;
|
||||
default:
|
||||
a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s});
|
||||
a.out = append(NULL, &a.start->node[0]);
|
||||
stack[++i] = a;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Append Final
|
||||
{
|
||||
stack[++i].start = mknfa((struct rgx_nfa){FINAL, {NULL}, v});
|
||||
stack[i].out = append(NULL, &stack[i].start->node[0]);
|
||||
}
|
||||
|
||||
// Collapse Stack
|
||||
while (i > 0)
|
||||
{
|
||||
struct frag to;
|
||||
to = stack[i--];
|
||||
patch(stack[i].out, to.start);
|
||||
}
|
||||
|
||||
return stack[i].start;
|
||||
}
|
||||
|
||||
int rgx_run(struct rgx_nfa *l, char *s) {
|
||||
while (l)
|
||||
{
|
||||
switch (l->op)
|
||||
{
|
||||
case FINAL:
|
||||
return *s ? 0: l->c;
|
||||
case C_ANY:
|
||||
if (!*(s++))
|
||||
return 0;
|
||||
l = l->node[0];
|
||||
break;
|
||||
case RUNE:
|
||||
if (*(s++) != l->c)
|
||||
return 0;
|
||||
l = l->node[0];
|
||||
break;
|
||||
case UNION:
|
||||
int res = rgx_run(l->node[0], s);
|
||||
if (res)
|
||||
return res;
|
||||
l = l->node[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
0
src/main.c
Normal file
0
src/main.c
Normal file
9
src/main.h
Normal file
9
src/main.h
Normal file
@@ -0,0 +1,9 @@
|
||||
struct rgx_nfa
|
||||
{
|
||||
enum {RUNE, C_ANY, UNION, FINAL} op;
|
||||
struct rgx_nfa *node[2];
|
||||
unsigned char c;
|
||||
};
|
||||
|
||||
struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v);
|
||||
int rgx_run(struct rgx_nfa * l, char * s);
|
||||
70
src/test.c
Normal file
70
src/test.c
Normal file
@@ -0,0 +1,70 @@
|
||||
#include <stdio.h>
|
||||
#include "main.h"
|
||||
|
||||
struct test
|
||||
{
|
||||
char * regex;
|
||||
struct match
|
||||
{
|
||||
char * s;
|
||||
int expect;
|
||||
} matches[0xff];
|
||||
} test_suite[] =
|
||||
{
|
||||
// Basic
|
||||
{"abc", {{"a", 0},{"abc", 1}, {"abcd",0 }, {NULL}}},
|
||||
{"a.c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"aac", 1}, {NULL}}},
|
||||
{"ab+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {NULL}}},
|
||||
{"ab*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}},
|
||||
{"ab?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {NULL}}},
|
||||
|
||||
// . Compose
|
||||
{"a..c", {{"a", 0},{"abc", 0}, {"abcd",0 }, {"acc", 0}, {"ac", 0}, {"abbc", 1}, {NULL}}},
|
||||
{"a.+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 0}, {"abbc", 1}, {NULL}}},
|
||||
{"a.*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {"abbc", 1}, {NULL}}},
|
||||
{"a.?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {NULL}}},
|
||||
|
||||
// Equal to *
|
||||
{"ab+?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}},
|
||||
{"ab*?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}},
|
||||
{"ab?+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}},
|
||||
{"ab?*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}},
|
||||
|
||||
// . Alt
|
||||
//{"a|b", {{"a", 1},{"abc", 0}, {"b", 1 }, {"ba", 0}, {NULL}}},
|
||||
|
||||
{"ab??c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {"c", 1}, {NULL}}},
|
||||
{"ab++c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {"c", 0}, {"ababc",0}, {NULL}}},
|
||||
{"ab**c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {"c", 1}, {"ababc",1}, {NULL}}},
|
||||
|
||||
// Xd
|
||||
{"a?", {{"aaaa", 0}, {"a", 1}, {NULL}}},
|
||||
{"a*", {{"aaaa", 1}, {"a", 1}, {NULL}}},
|
||||
// REDOS
|
||||
{"a+*", {{"aaaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1}, {NULL}}},
|
||||
{"a*b?a*", {{"aaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", 1}, {NULL}}},
|
||||
{"aa*+", {{"aaaa", 1}, {NULL}}},
|
||||
{"a*+", {{"a", 1}, {NULL}}},
|
||||
} ;
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int i=0, j=0;
|
||||
|
||||
for(i =0; i<sizeof(test_suite)/sizeof(*test_suite); ++i)
|
||||
{
|
||||
struct test t = test_suite[i];
|
||||
struct rgx_nfa * nfa;
|
||||
|
||||
printf("\n\t%s\n", t.regex);
|
||||
nfa = rgx_compile(NULL, t.regex, 1);
|
||||
|
||||
for( struct match * m = t.matches; m->s; ++m)
|
||||
{
|
||||
int res;
|
||||
res = rgx_run(nfa, m->s);
|
||||
printf("%s: %d %s\n", res == m->expect?"PASS":"FAIL", res, m->s);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user