Static check and remove for epsilon loops

This commit is contained in:
PedroEdiaz
2025-12-26 12:29:26 -06:00
parent e4f67ab3fe
commit a15e1c212a
7 changed files with 866 additions and 277 deletions

15
.clang-format Normal file
View File

@@ -0,0 +1,15 @@
# Tabs
UseTab: ForContinuationAndIndentation #ForIndentation
# Sized
TabWidth: 8
IndentWidth: 8
ContinuationIndentWidth: 8
# Column Limit
ColumnLimit: 80
# Functions
AllowAllArgumentsOnNextLine: false
# Allman
BreakBeforeBraces: Allman

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
src/*.o
src/test

View File

@@ -1,15 +1,25 @@
BIN = src/main
TEST = src/test TEST = src/test
DOC = doc/main.pdf
OBJ = \ OBJ = \
src/lexer.o src/lexer.o
#all: $(BIN) $(TEST) all: $(BIN) $(TEST) $(DOC)
test: $(TEST) test: $(TEST)
$(DOC): doc/main.typst
typst compile $< $@
$(BIN): $(OBJ) $(BIN).o $(BIN): $(OBJ) $(BIN).o
$(CC) -g -o $(BIN) $(BIN).o $(OBJ) $(CC) -g -o $(BIN) $(BIN).o $(OBJ)
$(TEST): $(OBJ) $(TEST).o $(TEST): $(OBJ) $(TEST).o
$(CC) -g -o $(TEST) $(TEST).o $(OBJ) $(CC) -g -o $(TEST) $(TEST).o $(OBJ)
fmt:
clang-format -i src/*.c src/*.h
clean:
rm $(OBJ) $(BIN)

View File

@@ -1,147 +1,279 @@
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h> #include <ctype.h>
#include "main.h" #include <stdlib.h>
#include <string.h>
struct patch_list #define DEBUG 0
#ifdef DEBUG
#include <stdio.h>
#endif
struct ll
{ {
struct rgx_nfa **ptr; void *p;
struct patch_list *next; struct ll *next;
}; };
struct frag struct frag
{ {
struct rgx_nfa *start; enum
struct patch_list *pl;
};
struct patch_list * pl_append(struct patch_list * pl, struct rgx_nfa ** nfa)
{
struct patch_list * res, * root = pl;
res = malloc(sizeof(struct patch_list));
*res = (struct patch_list){nfa, NULL};
if (pl == 0 ) {
return res;
}
while (pl->next) {
pl = pl->next;
}
pl->next = res;
return root;
}
void pl_merge(struct patch_list ** pl1, struct patch_list * pl2)
{
struct patch_list * pl = *pl1;
if (!pl ) {
*pl1 = pl2;
return;
}
while (pl->next) {
pl = pl->next;
}
pl->next = pl2;
}
struct rgx_nfa * patch(struct patch_list *pl, struct rgx_nfa *to)
{
while (pl) {
*pl->ptr = to;
pl = pl->next;
}
return to;
}
void collapse(struct frag * stack, unsigned char * i)
{
if (stack[*i].start->op == BLOCK ) {
stack[(*i)--] ;
return;
}
while (*i > 0 )
{ {
struct frag to; RUNE,
CLASS,
NOT,
ANY,
FINAL,
UNION,
BLOCK
} type;
union
{
char rune;
char class;
char final;
struct frag *try;
struct frag * not ;
struct ll *ul;
} as;
struct frag *next;
struct ll *pl;
};
to = stack[(*i)--]; struct ll *ll(void *p)
if (stack[*i].start->op == BLOCK ) {
stack[*i]= to;
return;
}
patch(stack[*i].pl, to.start);
stack[*i].pl = to.pl;
}
}
struct rgx_nfa * mknfa(struct rgx_nfa nfa)
{ {
struct rgx_nfa * res; struct ll *res;
res = malloc(sizeof(struct rgx_nfa)); res = malloc(sizeof(struct ll));
*res = nfa; *res = (struct ll){.p = p, .next = NULL};
return res; return res;
} }
struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v) { void transfer(struct ll **dest, struct ll **src)
unsigned char i=-1; {
struct frag stack[0xff]; if (!(*dest))
struct patch_list * union_pl = NULL; {
*dest = *src;
if (l) { *src = NULL;
struct rgx_nfa *r; return;
r = mknfa((struct rgx_nfa){UNION, {l, NULL}, 1});
stack[++i] = (struct frag){r, pl_append(NULL, &r->node[1])};
} }
for(;*s; ++s) while ((*dest)->next)
{ dest = &((*dest)->next);
struct rgx_nfa *r;
struct frag a, b;
switch(*s) (*dest)->next = *src;
*src = NULL;
}
void patch(struct frag *dest, struct frag *src)
{
struct ll *t, *pl = dest->pl;
int epsilon_loop = 0;
while (t = pl)
{
struct ll *d_ul, *s_ul;
// Non Union pl
if (!*(void **)pl->p)
{
*(struct frag **)pl->p = src;
free(t);
pl = pl->next;
continue;
}
// Prepare Ul to patch
d_ul = ((struct frag *)pl->p)->as.ul;
while (d_ul->next)
d_ul = d_ul->next;
// Patch
if (src->type != UNION)
{
d_ul->next = ll(src);
pl = pl->next;
continue;
}
// Merge Naive
if (dest->type != UNION)
{
// TODO: Check if src must be free;
d_ul->next = src->as.ul;
pl = pl->next;
continue;
}
// Merge Clone
s_ul = src->as.ul;
while (s_ul)
{
if (s_ul->p != dest)
{
d_ul = d_ul->next = ll(s_ul->p);
}
else
{
epsilon_loop = 1;
}
s_ul = s_ul->next;
}
// if no epsilon loop, we should merge naive
pl = pl->next;
}
if (epsilon_loop)
{
dest->pl->next = src->pl;
}
else
{
dest->pl = src->pl;
}
src->pl = NULL;
}
void collapse(struct frag **stack, unsigned char *i)
{
if (stack[*i]->type == BLOCK)
{
stack[(*i)--];
return;
}
while (*i > 0)
{
struct frag *to;
to = stack[(*i)--];
if (stack[*i]->type == BLOCK)
{
stack[*i] = to;
return;
}
patch(stack[*i], to);
}
}
/* Frag Helpers */
struct frag * final(char final)
{
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = FINAL;
res->as.final = final;
return res;
}
struct frag *rune(char rune)
{
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = RUNE;
res->as.rune = rune;
res->next = NULL;
res->pl = ll(&res->next);
return res;
}
struct frag *any()
{
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = ANY;
res->next = NULL;
res->pl = ll(&res->next);
return res;
}
struct frag *alt(struct frag *frag)
{
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = UNION;
res->as.ul = ll(frag);
res->pl = ll(res);
return res;
}
struct frag * not(struct frag * frag)
{
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = NOT;
res->as.try = frag;
res->next = NULL;
res->pl = ll(&res->next);
return res;
}
struct frag *class(char class)
{
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = CLASS;
res->as.class = class;
res->next = NULL;
res->pl = ll(&res->next);
return res;
}
struct frag *block(void)
{
struct frag *res;
res = malloc(sizeof(struct frag));
res->type = BLOCK;
return res;
}
void *rgx_compile(void *l, char *s, int v)
{
unsigned char i = 0xff, j = 0;
struct frag *stack[0xff];
struct ll *ul[0xff] = {NULL};
for (; *s; ++s)
{
struct frag *r;
switch (*s)
{ {
case '(': case '(':
r = mknfa((struct rgx_nfa){BLOCK, {NULL}, 0}); stack[++i] = block();
stack[++i] = (struct frag){r, NULL }; ul[++j] = NULL;
break; break;
case ')': case ')':
collapse(stack, &i); collapse(stack, &i);
pl_merge(&stack[i].pl, union_pl);
union_pl=NULL; if (j != 0xff)
transfer(&stack[i]->pl, &ul[j--]);
break; break;
case '|': case '|':
collapse(stack, &i); collapse(stack, &i);
pl_merge(&union_pl, stack[i].pl); transfer(&ul[j], &stack[i]->pl);
stack[i] = alt(stack[i]);
if (i!= 0) { if (i != 0xff)
{
struct frag *a;
a = stack[i--]; a = stack[i--];
r = mknfa((struct rgx_nfa){BLOCK, {NULL}, 0}); stack[++i] = block();
stack[++i] = (struct frag){r, NULL }; stack[++i] = a;
stack[++i]= a;
} }
r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0});
stack[i] = (struct frag){r, pl_append(NULL, &r->node[1])};
break; break;
case '\\': case '\\':
switch(*(++s)) switch (*(++s))
{ {
case 's': case 's':
case 'd': case 'd':
case 'w': case 'w':
a.start = mknfa((struct rgx_nfa){CLASS, {NULL}, *s}); stack[++i] = class(*s);
a.pl = pl_append(NULL, &a.start->node[0]);
stack[++i] = a;
break; break;
case '\\': case '\\':
case '|': case '|':
@@ -150,112 +282,183 @@ struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v) {
case '*': case '*':
case '+': case '+':
case '?': case '?':
a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s}); case '\'':
a.pl = pl_append(NULL, &a.start->node[0]); stack[++i] = rune(*s);
stack[++i] = a;
break; break;
default: default:
a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, '\\'}); stack[++i] = rune('\\');
a.pl = pl_append(NULL, &a.start->node[0]); stack[++i] = rune(*s);
stack[++i] = a;
a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s});
a.pl = pl_append(NULL, &a.start->node[0]);
stack[++i] = a;
break; break;
} }
break; break;
case '*': case '\'':
r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0}); if (i == 0xff)
patch(stack[i].pl, r); {
stack[i] = (struct frag){r, pl_append(NULL, &r->node[1])}; stack[++i] = rune(*s);
break;
}
patch(stack[i], final(0));
stack[i] = not(stack[i]);
break;
case '*':
if (i == 0xff)
{
stack[++i] = rune(*s);
break;
}
r = alt(stack[i]);
patch(stack[i], r);
r->pl = stack[i]->pl;
stack[i] = r;
break;
case '?':
if (i == 0xff)
{
stack[++i] = rune(*s);
break;
}
r = alt(stack[i]);
transfer(&r->pl, &stack[i]->pl);
stack[i] = r;
break; break;
case '?':
r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0});
stack[i] = (struct frag){r, pl_append(stack[i].pl, &r->node[1]) };
break;
case '+': case '+':
r = mknfa((struct rgx_nfa){UNION, {stack[i].start, NULL}, 0}); if (i == 0xff)
stack[i].pl = pl_append(NULL, &patch(stack[i].pl, r)->node[1]); {
stack[++i] = rune(*s);
break;
}
r = alt(stack[i]);
patch(stack[i], r);
break; break;
case '.': case '.':
a.start = mknfa((struct rgx_nfa){ANY, {NULL}}); stack[++i] = any();
a.pl = pl_append(NULL, &a.start->node[0]);
stack[++i] = a;
break; break;
default: default:
a.start = mknfa((struct rgx_nfa){RUNE, {NULL}, *s}); stack[++i] = rune(*s);
a.pl = pl_append(NULL, &a.start->node[0]);
stack[++i] = a;
break; break;
} }
} }
// Collapse and add FINAL state // Collapse and add FINAL state
while ( i!=0 ) { while (i)
if (stack[i].start->op == BLOCK ) {
stack[i--];
collapse(stack, &i); collapse(stack, &i);
if (j != 0xff)
transfer(&stack[i]->pl, &ul[j--]);
} }
pl_merge(&stack[i].pl, union_pl); patch(stack[i], final(v));
union_pl = NULL;
patch(stack[i].pl, mknfa((struct rgx_nfa){FINAL, {NULL}, v})); // Merge w/ l
return stack[i].start; if (l)
patch(stack[i], alt(l));
return stack[i];
} }
int rgx_run(struct rgx_nfa *l, char *s) { int rgx_run(void *p, char *s)
{
struct frag *l = p;
while (l) while (l)
{ {
switch (l->op) switch (l->type)
{ {
case FINAL: case NOT:
//printf("FINAL: %c\n", *s? *s:'0'); #ifdef DEBUG
return *s ? 0: l->c; printf("[%p] NOT\n", l);
case ANY: #endif
//printf("ANY: %c\n", *s? *s:'0'); if (rgx_run(l->as.try, s) != 0)
if (!*(s++))
return 0; return 0;
l = l->node[0];
if (!*(s++))
return 0;
l = l->next;
break;
case FINAL:
#ifdef DEBUG
printf("[%p] FINAL: %c\n", l, *s ? *s : '0');
#endif
if (l->as.final == 0)
return 1;
return *s ? 0 : l->as.final;
case ANY:
#ifdef DEBUG
printf("[%p] ANY: %c\n", l, *s ? *s : '0');
#endif
if (!*(s++))
return 0;
l = l->next;
break; break;
case CLASS: case CLASS:
//printf("CLASS(%c): %c\n", l->c, *s? *s: '0'); #ifdef DEBUG
switch(l->c) printf("[%p] CLASS(%c): %c\n", l, l->as.class,
*s ? *s : '0');
#endif
switch (l->as.class)
{ {
case 's': case 's':
if(!isspace(*(s++))) if (!isspace(*(s++)))
return 0; return 0;
l = l->node[0]; l = l->next;
break; break;
case 'd': case 'd':
if(!isdigit(*(s++))) if (!isdigit(*(s++)))
return 0; return 0;
l = l->node[0]; l = l->next;
break; break;
case 'w': case 'w':
if(!isalpha(*(s++))) if (!isalpha(*(s++)))
return 0; return 0;
l = l->node[0]; l = l->next;
break; break;
default: default:
return 0; return 0;
} }
break; break;
case RUNE: case RUNE:
//printf("RUNE(%c): %c\n", l->c, *s?*s:'0'); #ifdef DEBUG
if (*(s++) != l->c) printf("[%p] RUNE(%c): %c\n", l, l->as.rune,
*s ? *s : '0');
#endif
if (*(s++) != l->as.rune)
return 0; return 0;
l = l->node[0]; l = l->next;
break; break;
case UNION: case UNION:
//printf("{\n"); {
int res = rgx_run(l->node[0], s); int res;
//printf("}\n"); struct ll *ul = l->as.ul;
if (res) #ifdef DEBUG
return res; printf("[%p] UNION\n", l);
l = l->node[1]; #endif
break; while (ul)
{
res = rgx_run(ul->p, s);
if (res != 0)
return res;
ul = ul->next;
}
#ifdef DEBUG
printf("[%p] END\n", l);
#endif
return 0;
}
default:
#ifdef DEBUG
printf("[%p]\n", l);
#endif
return 0;
} }
} }
#ifdef DEBUG
printf("[%p]\n", l);
#endif
return 0; return 0;
} }

View File

@@ -1,34 +0,0 @@
#include <stdio.h>
#include "main.h"
char * tokens[] =
{
"\"(\\\"|\"!)*\"",
"\\d+",
"L|R|N",
"\\w+",
"\\s+",
"[\\s*",
"\\s*]",
"\\s*:\\s*",
"\\s*\\|\\s*",
"\\s*<\\s*",
"\\s*;",
NULL,
};
int main(void)
{
int i;
struct rgx_nfa * l = NULL;
//for (i=0; tokens[i]; ++i) {
l=rgx_compile(l, tokens[0], 1);
//}
printf("\n%d\n", rgx_run(l, "\"\"")) ;
printf("\n%d\n", rgx_run(l, "\"b\"")) ;
printf("\n%d\n", rgx_run(l, "\"bb\"")) ;
printf("\n%d\n", rgx_run(l, "\"\"\"")) ;
printf("\n%d\n", rgx_run(l, "\"\\\"\"")) ;
}

View File

@@ -1,9 +1,2 @@
struct rgx_nfa void *rgx_compile(void *l, char *s, int v);
{ int rgx_run(void *l, char *s);
enum {RUNE, ANY, UNION, FINAL, CLASS, BLOCK} op;
struct rgx_nfa *node[2];
unsigned char c;
};
struct rgx_nfa * rgx_compile(struct rgx_nfa * l, char * s, int v);
int rgx_run(struct rgx_nfa * l, char * s);

View File

@@ -1,94 +1,494 @@
#include <stdio.h>
#include "main.h" #include "main.h"
#include <stdio.h>
struct test struct test
{ {
char * regex; char *regex;
struct match struct match
{ {
char * s; char *s;
int expect; int expect;
} matches[0xff]; } matches[0xff];
} test_suite[] = } test_suite[] = {
{
// Basic // Basic
{"abc", {{"a", 0},{"abc", 1}, {"abcd",0 }, {NULL}}}, {
{"a.c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"aac", 1}, {NULL}}}, "abc",
{"ab+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {NULL}}}, {
{"ab*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}}, {"a", 0},
{"ab?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {NULL}}}, {"abc", 1},
{"abcd", 0},
{NULL},
},
},
{
"a.c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"acc", 1},
{"aac", 1},
{NULL},
},
},
{
"ab*c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 1},
{"ab", 0},
{NULL},
},
},
{
"ab+c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 0},
{NULL},
},
},
{
"ab?c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 0},
{"ac", 1},
{NULL},
},
},
// . Compose // . Compose
{"a..c", {{"a", 0},{"abc", 0}, {"abcd",0 }, {"acc", 0}, {"ac", 0}, {"abbc", 1}, {NULL}}}, {
{"a.+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 0}, {"abbc", 1}, {NULL}}}, "a..c",
{"a.*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {"abbc", 1}, {NULL}}}, {
{"a.?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"acc", 1}, {"ac", 1}, {NULL}}}, {"a", 0},
{"abc", 0},
// TODO: Implement NFA to DFA to avoid loops {"abcd", 0},
{"acc", 0},
{"ac", 0},
{"abbc", 1},
{NULL},
},
},
{
"a.+c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"acc", 1},
{"ac", 0},
{"abbc", 1},
{NULL},
},
},
{
"a.*c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"acc", 1},
{"ac", 1},
{"abbc", 1},
{NULL},
},
},
{
"a.?c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"acc", 1},
{"ac", 1},
{NULL},
},
},
// Equal to * // Equal to *
{"ab+?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}}, {
{"ab*?c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {NULL}}}, "ab+?c",
//{"ab?+c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}}, {
//{"ab?*c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"ababc", 1}, {"ac", 1}, {NULL}}}, {"a", 0},
{"a+*", {{"aaaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1}, {NULL}}}, {"abc", 1},
//{"a**", {{"aaaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1}, {NULL}}}, {"abcd", 0},
{"abbc", 1},
{"ac", 1},
{"ab", 0},
{NULL},
},
},
{
"ab*?c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 1},
{"ab", 0},
{NULL},
},
},
// Nilpotent {
{"ab??c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 0}, {"ac", 1}, {"c", 0}, {NULL}}}, "ab+*c",
{"ab++c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 0}, {"c", 0}, {"ababc",0}, {NULL}}}, {
//{"ab**c", {{"a", 0},{"abc", 1}, {"abcd",0 }, {"abbc", 1}, {"ac", 1}, {"c", 1}, {"ababc",1}, {NULL}}}, {"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 1},
{"ab", 0},
{NULL},
},
},
// Epsilon-Loops
{
"ab?+c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 1},
{"ab", 0},
{NULL},
},
},
{
"ab?*c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 1},
{"ab", 0},
{NULL},
},
},
// Nillpotent
{
"ab**c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 1},
{"ab", 0},
{NULL},
},
},
{
"ab++c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 1},
{"ac", 0},
{NULL},
},
},
{
"ab??c",
{
{"a", 0},
{"abc", 1},
{"abcd", 0},
{"abbc", 0},
{"ac", 1},
{NULL},
},
},
// UNION // UNION
{"a()", {{"a", 1},{"abc", 0}, {"b", 0 }, {"ba", 0}, {NULL}}}, {
{"a(b)", {{"ab", 1},{"abc", 0}, {"b", 0 }, {"ba", 0}, {NULL}}}, "a()",
{"a(b)*", {{"ab", 1},{"abb", 1}, {"a", 1 }, {"ba", 0}, {NULL}}}, {
{"a|b", {{"a", 1},{"abc", 0}, {"b", 1 }, {"ba", 0}, {NULL}}}, {"a", 1},
{"(a|.)*", {{"a", 1},{"abc", 1}, {"b", 1 }, {"ba", 1}, {NULL}}}, {"abc", 0},
{"(.|a)*", {{"a", 1},{"abc", 1}, {"b", 1 }, {"ba", 1}, {NULL}}}, {"b", 0},
{"(b|a)*", {{"a", 1},{"cab", 0}, {"b", 1 }, {"ab", 1}, {"cacb", 0}, {NULL}}}, {"ba", 0},
{"c(b|a)*", {{"ca", 1},{"cab", 1}, {"b", 0 }, {"bb", 0}, {"cacb", 0}, {NULL}}}, {NULL},
{"ab|b", {{"ab", 1}, {"b", 1}, {"abb", 0}, {NULL}}}, },
{"a|b|c", {{"a", 1}, {"b", 1}, {"c", 1},{"d",0}, {NULL}}}, },
{"((a|b)|c)d", {{"ad", 1}, {"bd", 1}, {"cd", 1},{"a",0}, {NULL}}}, {
"a(b)",
{
{"ab", 1},
{"abc", 0},
{"b", 0},
{"ba", 0},
{NULL},
},
},
{
"a(b)*",
{
{"ab", 1},
{"abb", 1},
{"a", 1},
{"ba", 0},
{NULL},
},
},
{
"a|b",
{
{"a", 1},
{"abc", 0},
{"b", 1},
{"ba", 0},
{NULL},
},
},
{
"a|b*",
{
{"a", 1},
{"abc", 0},
{"b", 1},
{"bb", 1},
{NULL},
},
},
{
"(a|.)*",
{
{"a", 1},
{"abc", 1},
{"b", 1},
{"ba", 1},
{NULL},
},
},
{
"(.|a)*",
{
{"a", 1},
{"abc", 1},
{"b", 1},
{"ba", 1},
{NULL},
},
},
{
"c(b|a)*",
{
{"ca", 1},
{"cab", 1},
{"b", 0},
{"bb", 0},
{"cacb", 0},
{NULL},
},
},
{
"ab|b",
{
{"ab", 1},
{"b", 1},
{"abb", 0},
{NULL},
},
},
{
"a|b|c",
{
{"a", 1},
{"b", 1},
{"c", 1},
{"d", 0},
{NULL},
},
},
{
"((a|b)|c)d",
{
{"ad", 1},
{"bd", 1},
{"cd", 1},
{"a", 0},
{NULL},
},
},
// BLOCK // BLOCK
{"(abb", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, {
{"abb)", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, "(abb",
{"a(bb", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, {
{"ab)b", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, {"abb", 1},
{"(ab)b", {{"abb", 1}, {"ab", 0}, {"b", 0}, {NULL}}}, {"ab", 0},
{"(ab)+", {{"ab", 1}, {"abab", 1}, {"b", 0}, {NULL}}}, {"b", 0},
{"(a(bc)*)", {{"a", 1}, {"abc", 1}, {"abb", 0}, {NULL}}}, {NULL},
{"(ab|cd)", {{"ab", 1}, {"cd", 1}, {"abcd", 0}, {NULL}}}, },
},
// REDOS {
{"a*b?a*", {{"aaa", 1}, {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", 1}, {NULL}}}, "abb)",
} ; {
{"abb", 1},
{"ab", 0},
{"b", 0},
{NULL},
},
},
{
"a(bb",
{
{"abb", 1},
{"ab", 0},
{"b", 0},
{NULL},
},
},
{
"ab)b",
{
{"abb", 1},
{"ab", 0},
{"b", 0},
{NULL},
},
},
{
"(ab)b",
{
{"abb", 1},
{"ab", 0},
{"b", 0},
{NULL},
},
},
{
"(ab)+",
{
{"ab", 1},
{"abab", 1},
{"b", 0},
{NULL},
},
},
{
"(a(bc)*)",
{
{"a", 1},
{"abc", 1},
{"abb", 0},
{NULL},
},
},
{
"(ab|cd)",
{
{"ab", 1},
{"cd", 1},
{"abcd", 0},
{NULL},
},
},
// NEG
{
"a'",
{
{"a", 0},
{"b", 1},
{NULL},
},
},
{
"a''",
{
{"a", 1},
{"b", 0},
{NULL},
},
},
{
"a|b'",
{
{"a", 1},
{"b", 0},
{"c", 1},
{NULL},
},
},
{
"{({|})'*}",
{
{"{}", 1},
{"{{}", 0},
{"{}}", 0},
{NULL},
},
},
{
"\"(\\\\.|(\"|\\\\)')*\"",
{
{"\"\"", 1},
{"\"\"\"", 0},
{"\"\\\"", 0},
{"\"lsk\\\"lsdk\"", 1},
{NULL},
},
},
{
"\"((\"|\\\\)'|\\\\.)*\"",
{
{"\"\"", 1},
{"\"\"\"", 0},
{"\"\\\"", 0},
{"\"lsk\\\"lsdk\"", 1},
{NULL},
},
},
{
"\\\\\\*(\\*\\\\)'*)\\*\\\\",
{
{"\\*lskd*\\", 1},
{"\\****\\", 1},
{"\\*\\\\*\\", 1},
{"\\*ls*\\ lsdk *\\", 0},
{NULL},
},
},
};
int main(void) int main(void)
{ {
int i=0, j=0; int i = 0;
for(i =0; i<sizeof(test_suite)/sizeof(*test_suite); ++i) for (i = 0; i < sizeof(test_suite) / sizeof(*test_suite); ++i)
{ {
struct test t = test_suite[i]; struct test t = test_suite[i];
struct rgx_nfa * nfa; void *nfa;
printf("\n\t%s\n", t.regex); printf("\n\t%s\n", t.regex);
nfa = rgx_compile(NULL, t.regex, 1); nfa = rgx_compile(NULL, t.regex, 1);
for (struct match *m = t.matches; m->s; ++m)
if (!nfa ){
printf("Malformed\n");
continue;
}
for( struct match * m = t.matches; m->s; ++m)
{ {
int res; int res;
res = rgx_run(nfa, m->s); res = rgx_run(nfa, m->s);
printf("%s: %d %s\n", res == m->expect?"PASS":"FAIL", res, m->s); printf("%s: %d %s\n",
res == m->expect ? "PASS" : "FAIL", res, m->s);
/*
if (res != m->expect)
return 0;
*/
} }
} }
} }