/********************************************
rexp1.c
libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/* re machine operations */
#include "rexp.h"
#include "libmawk/zmalloc.h"
/* initialize a two state machine */
static int new_TWO(mawk_state_t *MAWK, int type, MACHINE *mp)
{
mp->start = (mawk_RESTATE *) mawk_RE_malloc(MAWK, 2 * STATESZ);
if (mp->start == NULL) {
MAWK->REerrno = MEMORY_FAILURE;
return -MEMORY_FAILURE;
}
mp->size = 2 * STATESZ;
mp->stop = mp->start + 1;
mp->start->type = type;
mp->stop->type = M_ACCEPT;
return 0;
}
static const MACHINE INVM = {NULL, NULL};
/* build a machine that recognizes any */
MACHINE mawk_RE_any(mawk_state_t *MAWK)
{
MACHINE x;
if (new_TWO(MAWK, M_ANY, &x) < 0)
return INVM;
return x;
}
/* build a machine that recognizes the start of string */
MACHINE mawk_RE_start(mawk_state_t *MAWK)
{
MACHINE x;
if (new_TWO(MAWK, M_START, &x) < 0)
return INVM;
return x;
}
MACHINE mawk_RE_end(mawk_state_t *MAWK)
{
MACHINE x;
if (new_TWO(MAWK, M_END, &x) < 0)
return INVM;
return x;
}
/* build a machine that recognizes a class */
MACHINE mawk_RE_class(mawk_state_t *MAWK, mawk_BV *bvp)
{
MACHINE x;
if (new_TWO(MAWK, M_CLASS, &x) < 0)
return INVM;
x.start->data.bvp = bvp;
return x;
}
MACHINE mawk_RE_u(mawk_state_t *MAWK)
{
MACHINE x;
if (new_TWO(MAWK, M_U, &x) < 0)
return INVM;
return x;
}
MACHINE mawk_RE_str(mawk_state_t *MAWK, char *str, unsigned len)
{
MACHINE x;
if (new_TWO(MAWK, M_STR, &x) < 0)
return INVM;
x.start->len = len;
x.start->data.str = str;
return x;
}
/* replace m and n by a machine that recognizes mn */
int mawk_RE_cat(mawk_state_t *MAWK, MACHINE *mp, MACHINE *np)
{
unsigned sz1, sz2, sz;
sz1 = mp->stop - mp->start;
sz2 = np->stop - np->start + 1;
sz = sz1 + sz2;
mp->start = (mawk_RESTATE *) mawk_RE_realloc(MAWK, mp->start, mp->size, sz * STATESZ);
if (mp->start == NULL)
return -MEMORY_FAILURE;
mp->size = sz * STATESZ;
mp->stop = mp->start + (sz - 1);
memcpy(mp->start + sz1, np->start, sz2 * STATESZ);
mawk_RE_free(MAWK, np->start, np->size);
return 0;
}
/* replace m by a machine that recognizes m|n */
int mawk_RE_or(mawk_state_t *MAWK, MACHINE *mp, MACHINE *np)
{
register mawk_RESTATE *p;
unsigned szm, szn;
szm = mp->stop - mp->start + 1;
szn = np->stop - np->start + 1;
p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (szm + szn + 1) * STATESZ);
if (p == NULL)
return -MEMORY_FAILURE;
memcpy(p + 1, mp->start, szm * STATESZ);
mawk_RE_free(MAWK, mp->start, mp->size);
mp->start = p;
mp->size = (szm + szn + 1) * STATESZ;
(mp->stop = p + szm + szn)->type = M_ACCEPT;
p->type = M_2JA;
p->data.jump = szm + 1;
memcpy(p + szm + 1, np->start, szn * STATESZ);
mawk_RE_free(MAWK, np->start, np->size);
(p += szm)->type = M_1J;
p->data.jump = szn;
return 0;
}
/* UNARY OPERATIONS */
/* replace m by m* */
int mawk_RE_close(mawk_state_t *MAWK, MACHINE *mp)
{
register mawk_RESTATE *p;
unsigned sz;
sz = mp->stop - mp->start + 1;
p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (sz + 2) * STATESZ);
if (p == NULL)
return -MEMORY_FAILURE;
memcpy(p + 1, mp->start, sz * STATESZ);
mawk_RE_free(MAWK, mp->start, mp->size);
mp->start = p;
mp->size = (sz + 2) * STATESZ;
mp->stop = p + (sz + 1);
p->type = M_2JA;
p->data.jump = sz + 1;
(p += sz)->type = M_2JB;
p->data.jump = -(sz - 1);
(p + 1)->type = M_ACCEPT;
return 0;
}
/* replace m by m+ (positive closure) */
int mawk_RE_poscl(mawk_state_t *MAWK, MACHINE *mp)
{
register mawk_RESTATE *p;
unsigned sz;
sz = mp->stop - mp->start + 1;
mp->start = p = (mawk_RESTATE *) mawk_RE_realloc(MAWK, mp->start, mp->size, (sz + 1) * STATESZ);
mp->size = (sz + 1) * STATESZ;
if (mp->start == NULL)
return -MEMORY_FAILURE;
mp->stop = p + sz;
p += --sz;
p->type = M_2JB;
p->data.jump = -sz;
(p + 1)->type = M_ACCEPT;
return 0;
}
/* replace m by m? (zero or one) */
int mawk_RE_01(mawk_state_t *MAWK, MACHINE *mp)
{
unsigned sz;
register mawk_RESTATE *p;
sz = mp->stop - mp->start + 1;
p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (sz + 1) * STATESZ);
if (p == NULL)
return -MEMORY_FAILURE;
memcpy(p + 1, mp->start, sz * STATESZ);
mawk_RE_free(MAWK, mp->start, mp->size);
mp->start = p;
mp->size = (sz + 1) * STATESZ;
mp->stop = p + sz;
p->type = M_2JB;
p->data.jump = sz;
return 0;
}
/*===================================
MEMORY ALLOCATION
*==============================*/
PTR mawk_RE_malloc(mawk_state_t *MAWK, unsigned sz)
{
PTR p;
p = mawk_zmalloc(MAWK, sz);
#ifdef MAWK_RE_MDEBUG
fprintf(stderr, "RE malloc: -> [%p] %d\n", p, sz);
#endif
return p;
}
PTR mawk_RE_realloc(mawk_state_t *MAWK, register PTR p, unsigned oldsz, unsigned sz)
{
PTR n;
n = mawk_zrealloc(MAWK, p, oldsz, sz);
#ifdef MAWK_RE_MDEBUG
fprintf(stderr, "RE realloc: [%p] %d -> [%p] %d\n", p, oldsz, n, sz);
#endif
return n;
}
void mawk_RE_free(mawk_state_t *MAWK, PTR p, unsigned sz)
{
#ifdef MAWK_RE_MDEBUG
fprintf(stderr, "RE free: [%p] %d\n", p, sz);
#endif
mawk_zfree(MAWK, p, sz);
}