Codebase list libmawk / 70499808-c2b9-4be2-ae11-0ab4b3a1a115/main src / libmawk / rexp / rexp1.c
70499808-c2b9-4be2-ae11-0ab4b3a1a115/main

Tree @70499808-c2b9-4be2-ae11-0ab4b3a1a115/main (Download .tar.gz)

rexp1.c @70499808-c2b9-4be2-ae11-0ab4b3a1a115/mainraw · history · blame

/********************************************
rexp1.c

libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the AWK programming language.

Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/

/*  re machine	operations  */

#include  "rexp.h"
#include  "libmawk/zmalloc.h"

/* initialize a two state machine */
static int new_TWO(mawk_state_t *MAWK, int type, MACHINE *mp)
{
	mp->start = (mawk_RESTATE *) mawk_RE_malloc(MAWK, 2 * STATESZ);
	if (mp->start == NULL) {
		MAWK->REerrno = MEMORY_FAILURE;
		return -MEMORY_FAILURE;
	}
	mp->size = 2 * STATESZ;
	mp->stop = mp->start + 1;
	mp->start->type = type;
	mp->stop->type = M_ACCEPT;
	return 0;
}

static const MACHINE INVM = {NULL, NULL};

/*  build a machine that recognizes any	 */
MACHINE mawk_RE_any(mawk_state_t *MAWK)
{
	MACHINE x;

	if (new_TWO(MAWK, M_ANY, &x) < 0)
		return INVM;
	return x;
}

/*  build a machine that recognizes the start of string	 */
MACHINE mawk_RE_start(mawk_state_t *MAWK)
{
	MACHINE x;

	if (new_TWO(MAWK, M_START, &x) < 0)
		return INVM;
	return x;
}

MACHINE mawk_RE_end(mawk_state_t *MAWK)
{
	MACHINE x;

	if (new_TWO(MAWK, M_END, &x) < 0)
		return INVM;
	return x;
}

/*  build a machine that recognizes a class  */
MACHINE mawk_RE_class(mawk_state_t *MAWK, mawk_BV *bvp)
{
	MACHINE x;

	if (new_TWO(MAWK, M_CLASS, &x) < 0)
		return INVM;
	x.start->data.bvp = bvp;
	return x;
}

MACHINE mawk_RE_u(mawk_state_t *MAWK)
{
	MACHINE x;

	if (new_TWO(MAWK, M_U, &x) < 0)
		return INVM;
	return x;
}

MACHINE mawk_RE_str(mawk_state_t *MAWK, char *str, unsigned len)
{
	MACHINE x;

	if (new_TWO(MAWK, M_STR, &x) < 0)
		return INVM;
	x.start->len = len;
	x.start->data.str = str;
	return x;
}


/*  replace m and n by a machine that recognizes  mn   */
int mawk_RE_cat(mawk_state_t *MAWK, MACHINE *mp, MACHINE *np)
{
	unsigned sz1, sz2, sz;

	sz1 = mp->stop - mp->start;
	sz2 = np->stop - np->start + 1;
	sz = sz1 + sz2;

	mp->start = (mawk_RESTATE *) mawk_RE_realloc(MAWK, mp->start, mp->size, sz * STATESZ);
	if (mp->start == NULL)
		return -MEMORY_FAILURE;
	mp->size = sz * STATESZ;
	mp->stop = mp->start + (sz - 1);
	memcpy(mp->start + sz1, np->start, sz2 * STATESZ);
	mawk_RE_free(MAWK, np->start, np->size);
	return 0;
}

 /*  replace m by a machine that recognizes m|n  */

int mawk_RE_or(mawk_state_t *MAWK, MACHINE *mp, MACHINE *np)
{
	register mawk_RESTATE *p;
	unsigned szm, szn;

	szm = mp->stop - mp->start + 1;
	szn = np->stop - np->start + 1;

	p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (szm + szn + 1) * STATESZ);
	if (p == NULL)
		return -MEMORY_FAILURE;
	memcpy(p + 1, mp->start, szm * STATESZ);
	mawk_RE_free(MAWK, mp->start, mp->size);
	mp->start = p;
	mp->size = (szm + szn + 1) * STATESZ;
	(mp->stop = p + szm + szn)->type = M_ACCEPT;
	p->type = M_2JA;
	p->data.jump = szm + 1;
	memcpy(p + szm + 1, np->start, szn * STATESZ);
	mawk_RE_free(MAWK, np->start, np->size);
	(p += szm)->type = M_1J;
	p->data.jump = szn;
	return 0;
}

/*  UNARY  OPERATIONS	  */

/*  replace m by m*   */
int mawk_RE_close(mawk_state_t *MAWK, MACHINE *mp)
{
	register mawk_RESTATE *p;
	unsigned sz;

	sz = mp->stop - mp->start + 1;
	p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (sz + 2) * STATESZ);
	if (p == NULL)
		return -MEMORY_FAILURE;
	memcpy(p + 1, mp->start, sz * STATESZ);
	mawk_RE_free(MAWK, mp->start, mp->size);
	mp->start = p;
	mp->size = (sz + 2) * STATESZ;
	mp->stop = p + (sz + 1);
	p->type = M_2JA;
	p->data.jump = sz + 1;
	(p += sz)->type = M_2JB;
	p->data.jump = -(sz - 1);
	(p + 1)->type = M_ACCEPT;
	return 0;
}

/*  replace m  by  m+  (positive closure)   */
int mawk_RE_poscl(mawk_state_t *MAWK, MACHINE *mp)
{
	register mawk_RESTATE *p;
	unsigned sz;

	sz = mp->stop - mp->start + 1;
	mp->start = p = (mawk_RESTATE *) mawk_RE_realloc(MAWK, mp->start, mp->size, (sz + 1) * STATESZ);
	mp->size = (sz + 1) * STATESZ;
	if (mp->start == NULL)
		return -MEMORY_FAILURE;
	mp->stop = p + sz;
	p += --sz;
	p->type = M_2JB;
	p->data.jump = -sz;
	(p + 1)->type = M_ACCEPT;
	return 0;
}

/* replace  m  by  m? (zero or one)  */
int mawk_RE_01(mawk_state_t *MAWK, MACHINE *mp)
{
	unsigned sz;
	register mawk_RESTATE *p;

	sz = mp->stop - mp->start + 1;
	p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (sz + 1) * STATESZ);
	if (p == NULL)
		return -MEMORY_FAILURE;
	memcpy(p + 1, mp->start, sz * STATESZ);
	mawk_RE_free(MAWK, mp->start, mp->size);
	mp->start = p;
	mp->size = (sz + 1) * STATESZ;
	mp->stop = p + sz;
	p->type = M_2JB;
	p->data.jump = sz;
	return 0;
}

/*===================================
MEMORY	ALLOCATION
 *==============================*/


PTR mawk_RE_malloc(mawk_state_t *MAWK, unsigned sz)
{
	PTR p;
	p = mawk_zmalloc(MAWK, sz);
#ifdef MAWK_RE_MDEBUG
	fprintf(stderr, "RE malloc: -> [%p] %d\n", p, sz);
#endif
	return p;
}

PTR mawk_RE_realloc(mawk_state_t *MAWK, register PTR p, unsigned oldsz, unsigned sz)
{
	PTR n;
	n = mawk_zrealloc(MAWK, p, oldsz, sz);
#ifdef MAWK_RE_MDEBUG
	fprintf(stderr, "RE realloc: [%p] %d -> [%p] %d\n", p, oldsz, n, sz);
#endif
	return n;
}

void mawk_RE_free(mawk_state_t *MAWK, PTR p, unsigned sz)
{
#ifdef MAWK_RE_MDEBUG
	fprintf(stderr, "RE free: [%p] %d\n", p, sz);
#endif
	mawk_zfree(MAWK, p, sz);
}