Codebase list libmawk / upstream/latest src / libmawk / re_cmpl.c
upstream/latest

Tree @upstream/latest (Download .tar.gz)

re_cmpl.c @upstream/latestraw · history · blame

/********************************************
re_cmpl.c

libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the AWK programming language.

Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/

#include <string.h>
#include "mawk.h"
#include "memory.h"
#include "scan.h"
#include "regexp.h"
#include "repl.h"


static mawk_cell_t *REPL_compile(mawk_state_t *, mawk_string_t *);

static const char efmt[] = "regular expression compile failed (%s)\n%s";

/* compile a mawk_string_t to a regular expression machine.
   Search a list of pre-compiled strings first
*/
PTR mawk_re_compile(mawk_state_t *MAWK, mawk_string_t *sval)
{
	register RE_NODE *p;
	RE_NODE *q;
	char *s;

	/* search list */
	s = sval->str;
	p = MAWK->re_list;
	q = (RE_NODE *) 0;
	while (p) {
		if (strcmp(s, p->sval->str) == 0) {	/* found */
			if (!q)										/* already at front */
				goto _return;
			else {										/* delete from list for move to front */

				q->link = p->link;
				goto found;
			}

		}
		else {
			q = p;
			p = p->link;
		}
	}

	/* not found */
	p = MAWK_ZMALLOC(MAWK, RE_NODE);
	p->sval = sval;

	sval->ref_cnt++;
	if (!(p->re = mawk_REcompile(MAWK, s))) {
		if (MAWK->mawk_state == EXECUTION)
			mawk_rt_error(MAWK, efmt, mawk_REerrlist[MAWK->REerrno], s);
		else {											/* compiling */

			mawk_compile_error(MAWK, efmt, mawk_REerrlist[MAWK->REerrno], s);
			return (PTR) 0;
		}
	}


found:
/* mawk_insert p at the front of the list */
	p->link = MAWK->re_list;
	MAWK->re_list = p;

_return:

#if 0
#ifdef	DEBUG
	if (MAWK->dump_RE)
		mawk_REmprint(p->re, stderr);
#endif
#endif
	return p->re;
}



/* this is only used by mawk_da() */


char *mawk_re_uncompile(mawk_state_t *MAWK, PTR m)
{
	register RE_NODE *p;

	for (p = MAWK->re_list; p; p = p->link)
		if (p->re == m)
			return p->sval->str;
#ifdef	DEBUG
	mawk_bozo(MAWK, "non compiled machine");
#endif
	return NULL;
}



/*=================================================*/
/*  replacement	 operations   */

/* create a replacement mawk_cell_t from a mawk_string_t *  */

static mawk_cell_t *REPL_compile(mawk_state_t *MAWK, mawk_string_t *sval)
{
	int i = 0;
	register char *p = sval->str;
	register char *q;
	char *xbuff;
	mawk_cell_t *cp;

	q = xbuff = (char *) mawk_zmalloc(MAWK, sval->len + 1);

	while (1) {
		switch (*p) {
		case 0:
			*q = 0;
			goto done;

		case '\\':
			if (p[1] == '&' || p[1] == '\\') {
				*q++ = p[1];
				p += 2;
				continue;
			}
			else
				break;

		case '&':
			/* if empty we don't need to make a node */
			if (q != xbuff) {
				*q = 0;
				split_buff[i++] = mawk_new_STRING(MAWK, xbuff);
			}
			/* and a null node for the '&'  */
			split_buff[i++] = (mawk_string_t *) 0;
			/*  reset  */
			p++;
			q = xbuff;
			continue;

		default:
			break;
		}

		*q++ = *p++;
	}

done:
	/* if we have one empty string it will get made now */
	if (q > xbuff || i == 0)
		split_buff[i++] = mawk_new_STRING(MAWK, xbuff);

	/* This will never happen */
	if (i > MAX_SPLIT)
		mawk_overflow(MAWK, "replacement pieces", MAX_SPLIT);

	cp = MAWK_ZMALLOC(MAWK, mawk_cell_t);
	if (i == 1 && split_buff[0]) {
		cp->type = C_REPL;
		cp->ptr = (PTR) split_buff[0];
	}
	else {
		mawk_string_t **sp = (mawk_string_t **)
			(cp->ptr = mawk_zmalloc(MAWK, sizeof(mawk_string_t *) * i));
		int j = 0;

		while (j < i)
			*sp++ = split_buff[j++];

		cp->type = C_REPLV;
		cp->d.vcnt = i;
	}
	mawk_zfree(MAWK, xbuff, sval->len + 1);
	return cp;
}

/* free memory used by a replacement mawk_cell_t  */

void mawk_repl_destroy(mawk_state_t *MAWK, register mawk_cell_t *cp)
{
	register mawk_string_t **p;
	unsigned cnt;

	if (cp->type == C_REPL)
		free_STRING(string(cp));
	else {												/* an C_REPLV       */

		p = (mawk_string_t **) cp->ptr;
		for (cnt = cp->d.vcnt; cnt; cnt--) {
			if (*p) {
				free_STRING(*p);
			}
			p++;
		}
		mawk_zfree(MAWK, cp->ptr, cp->d.vcnt * sizeof(mawk_string_t *));
	}
}

/* copy a C_REPLV cell to another mawk_cell_t */

mawk_cell_t *mawk_replv_cpy(mawk_state_t *MAWK, mawk_cell_t *target, const mawk_cell_t *source)
{
	mawk_string_t **t, **s;
	unsigned cnt;

	target->type = C_REPLV;
	cnt = target->d.vcnt = source->d.vcnt;
	target->ptr = (PTR) mawk_zmalloc(MAWK, cnt * sizeof(mawk_string_t *));

	t = (mawk_string_t **) target->ptr;
	s = (mawk_string_t **) source->ptr;
	while (cnt) {
		cnt--;
		if (*s)
			(*s)->ref_cnt++;
		*t++ = *s++;
	}
	return target;
}


/* search the list (with move to the front) for a compiled
   separator.
   return a ptr to a mawk_cell_t (C_REPL or C_REPLV)
*/

mawk_cell_t *mawk_repl_compile(mawk_state_t *MAWK, mawk_string_t *sval)
{
	register REPL_NODE *p;
	REPL_NODE *q;
	char *s;

	/* search the list */
	s = sval->str;
	p = MAWK->repl_list;
	q = (REPL_NODE *) 0;
	while (p) {
		if (strcmp(s, p->sval->str) == 0) {	/* found */
			if (!q)										/* already at front */
				return p->cp;
			else {										/* delete from list for move to front */

				q->link = p->link;
				goto found;
			}

		}
		else {
			q = p;
			p = p->link;
		}
	}

	/* not found */
	p = MAWK_ZMALLOC(MAWK, REPL_NODE);
	p->sval = sval;
	sval->ref_cnt++;
	p->cp = REPL_compile(MAWK, sval);

found:
/* mawk_insert p at the front of the list */
	p->link = MAWK->repl_list;
	MAWK->repl_list = p;
	return p->cp;
}

/* return the string for a mawk_cell_t or type REPL or REPLV,
   this is only used by mawk_da()  */


char *mawk_repl_uncompile(mawk_state_t *MAWK, mawk_cell_t *cp)
{
	register REPL_NODE *p = MAWK->repl_list;

	if (cp->type == C_REPL) {
		while (p) {
			if (p->cp->type == C_REPL && p->cp->ptr == cp->ptr)
				return p->sval->str;
			else
				p = p->link;
		}
	}
	else {
		while (p) {
			if (p->cp->type == C_REPLV && memcmp(cp->ptr, p->cp->ptr, cp->d.vcnt * sizeof(mawk_string_t *))
					== 0)
				return p->sval->str;
			else
				p = p->link;
		}
	}

#if  DEBUG
	mawk_bozo(MAWK, "unable to uncompile an repl");
#endif
	return NULL;
}

/*
  convert a C_REPLV to	C_REPL
     replacing the &s with sval
*/

mawk_cell_t *mawk_replv_to_repl(mawk_state_t *MAWK, mawk_cell_t *cp, mawk_string_t *sval)
{
	register mawk_string_t **p;
	mawk_string_t **sblock = (mawk_string_t **) cp->ptr;
	unsigned cnt, vcnt = cp->d.vcnt;
	unsigned len;
	char *target;

#ifdef	DEBUG
	if (cp->type != C_REPLV)
		mawk_bozo(MAWK, "not replv");
#endif

	p = sblock;
	cnt = vcnt;
	len = 0;
	while (cnt--) {
		if (*p)
			len += (*p++)->len;
		else {
			*p++ = sval;
			sval->ref_cnt++;
			len += sval->len;
		}
	}
	cp->type = C_REPL;
	cp->ptr = (PTR) mawk_new_STRING0(MAWK, len);

	p = sblock;
	cnt = vcnt;
	target = string(cp)->str;
	while (cnt--) {
		memcpy(target, (*p)->str, (*p)->len);
		target += (*p)->len;
		free_STRING(*p);
		p++;
	}

	mawk_zfree(MAWK, sblock, vcnt * sizeof(mawk_string_t *));
	return cp;
}