/********************************************
re_cmpl.c
libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
#include <string.h>
#include "mawk.h"
#include "memory.h"
#include "scan.h"
#include "regexp.h"
#include "repl.h"
static mawk_cell_t *REPL_compile(mawk_state_t *, mawk_string_t *);
static const char efmt[] = "regular expression compile failed (%s)\n%s";
/* compile a mawk_string_t to a regular expression machine.
Search a list of pre-compiled strings first
*/
PTR mawk_re_compile(mawk_state_t *MAWK, mawk_string_t *sval)
{
register RE_NODE *p;
RE_NODE *q;
char *s;
/* search list */
s = sval->str;
p = MAWK->re_list;
q = (RE_NODE *) 0;
while (p) {
if (strcmp(s, p->sval->str) == 0) { /* found */
if (!q) /* already at front */
goto _return;
else { /* delete from list for move to front */
q->link = p->link;
goto found;
}
}
else {
q = p;
p = p->link;
}
}
/* not found */
p = MAWK_ZMALLOC(MAWK, RE_NODE);
p->sval = sval;
sval->ref_cnt++;
if (!(p->re = mawk_REcompile(MAWK, s))) {
if (MAWK->mawk_state == EXECUTION)
mawk_rt_error(MAWK, efmt, mawk_REerrlist[MAWK->REerrno], s);
else { /* compiling */
mawk_compile_error(MAWK, efmt, mawk_REerrlist[MAWK->REerrno], s);
return (PTR) 0;
}
}
found:
/* mawk_insert p at the front of the list */
p->link = MAWK->re_list;
MAWK->re_list = p;
_return:
#if 0
#ifdef DEBUG
if (MAWK->dump_RE)
mawk_REmprint(p->re, stderr);
#endif
#endif
return p->re;
}
/* this is only used by mawk_da() */
char *mawk_re_uncompile(mawk_state_t *MAWK, PTR m)
{
register RE_NODE *p;
for (p = MAWK->re_list; p; p = p->link)
if (p->re == m)
return p->sval->str;
#ifdef DEBUG
mawk_bozo(MAWK, "non compiled machine");
#endif
return NULL;
}
/*=================================================*/
/* replacement operations */
/* create a replacement mawk_cell_t from a mawk_string_t * */
static mawk_cell_t *REPL_compile(mawk_state_t *MAWK, mawk_string_t *sval)
{
int i = 0;
register char *p = sval->str;
register char *q;
char *xbuff;
mawk_cell_t *cp;
q = xbuff = (char *) mawk_zmalloc(MAWK, sval->len + 1);
while (1) {
switch (*p) {
case 0:
*q = 0;
goto done;
case '\\':
if (p[1] == '&' || p[1] == '\\') {
*q++ = p[1];
p += 2;
continue;
}
else
break;
case '&':
/* if empty we don't need to make a node */
if (q != xbuff) {
*q = 0;
split_buff[i++] = mawk_new_STRING(MAWK, xbuff);
}
/* and a null node for the '&' */
split_buff[i++] = (mawk_string_t *) 0;
/* reset */
p++;
q = xbuff;
continue;
default:
break;
}
*q++ = *p++;
}
done:
/* if we have one empty string it will get made now */
if (q > xbuff || i == 0)
split_buff[i++] = mawk_new_STRING(MAWK, xbuff);
/* This will never happen */
if (i > MAX_SPLIT)
mawk_overflow(MAWK, "replacement pieces", MAX_SPLIT);
cp = MAWK_ZMALLOC(MAWK, mawk_cell_t);
if (i == 1 && split_buff[0]) {
cp->type = C_REPL;
cp->ptr = (PTR) split_buff[0];
}
else {
mawk_string_t **sp = (mawk_string_t **)
(cp->ptr = mawk_zmalloc(MAWK, sizeof(mawk_string_t *) * i));
int j = 0;
while (j < i)
*sp++ = split_buff[j++];
cp->type = C_REPLV;
cp->d.vcnt = i;
}
mawk_zfree(MAWK, xbuff, sval->len + 1);
return cp;
}
/* free memory used by a replacement mawk_cell_t */
void mawk_repl_destroy(mawk_state_t *MAWK, register mawk_cell_t *cp)
{
register mawk_string_t **p;
unsigned cnt;
if (cp->type == C_REPL)
free_STRING(string(cp));
else { /* an C_REPLV */
p = (mawk_string_t **) cp->ptr;
for (cnt = cp->d.vcnt; cnt; cnt--) {
if (*p) {
free_STRING(*p);
}
p++;
}
mawk_zfree(MAWK, cp->ptr, cp->d.vcnt * sizeof(mawk_string_t *));
}
}
/* copy a C_REPLV cell to another mawk_cell_t */
mawk_cell_t *mawk_replv_cpy(mawk_state_t *MAWK, mawk_cell_t *target, const mawk_cell_t *source)
{
mawk_string_t **t, **s;
unsigned cnt;
target->type = C_REPLV;
cnt = target->d.vcnt = source->d.vcnt;
target->ptr = (PTR) mawk_zmalloc(MAWK, cnt * sizeof(mawk_string_t *));
t = (mawk_string_t **) target->ptr;
s = (mawk_string_t **) source->ptr;
while (cnt) {
cnt--;
if (*s)
(*s)->ref_cnt++;
*t++ = *s++;
}
return target;
}
/* search the list (with move to the front) for a compiled
separator.
return a ptr to a mawk_cell_t (C_REPL or C_REPLV)
*/
mawk_cell_t *mawk_repl_compile(mawk_state_t *MAWK, mawk_string_t *sval)
{
register REPL_NODE *p;
REPL_NODE *q;
char *s;
/* search the list */
s = sval->str;
p = MAWK->repl_list;
q = (REPL_NODE *) 0;
while (p) {
if (strcmp(s, p->sval->str) == 0) { /* found */
if (!q) /* already at front */
return p->cp;
else { /* delete from list for move to front */
q->link = p->link;
goto found;
}
}
else {
q = p;
p = p->link;
}
}
/* not found */
p = MAWK_ZMALLOC(MAWK, REPL_NODE);
p->sval = sval;
sval->ref_cnt++;
p->cp = REPL_compile(MAWK, sval);
found:
/* mawk_insert p at the front of the list */
p->link = MAWK->repl_list;
MAWK->repl_list = p;
return p->cp;
}
/* return the string for a mawk_cell_t or type REPL or REPLV,
this is only used by mawk_da() */
char *mawk_repl_uncompile(mawk_state_t *MAWK, mawk_cell_t *cp)
{
register REPL_NODE *p = MAWK->repl_list;
if (cp->type == C_REPL) {
while (p) {
if (p->cp->type == C_REPL && p->cp->ptr == cp->ptr)
return p->sval->str;
else
p = p->link;
}
}
else {
while (p) {
if (p->cp->type == C_REPLV && memcmp(cp->ptr, p->cp->ptr, cp->d.vcnt * sizeof(mawk_string_t *))
== 0)
return p->sval->str;
else
p = p->link;
}
}
#if DEBUG
mawk_bozo(MAWK, "unable to uncompile an repl");
#endif
return NULL;
}
/*
convert a C_REPLV to C_REPL
replacing the &s with sval
*/
mawk_cell_t *mawk_replv_to_repl(mawk_state_t *MAWK, mawk_cell_t *cp, mawk_string_t *sval)
{
register mawk_string_t **p;
mawk_string_t **sblock = (mawk_string_t **) cp->ptr;
unsigned cnt, vcnt = cp->d.vcnt;
unsigned len;
char *target;
#ifdef DEBUG
if (cp->type != C_REPLV)
mawk_bozo(MAWK, "not replv");
#endif
p = sblock;
cnt = vcnt;
len = 0;
while (cnt--) {
if (*p)
len += (*p++)->len;
else {
*p++ = sval;
sval->ref_cnt++;
len += sval->len;
}
}
cp->type = C_REPL;
cp->ptr = (PTR) mawk_new_STRING0(MAWK, len);
p = sblock;
cnt = vcnt;
target = string(cp)->str;
while (cnt--) {
memcpy(target, (*p)->str, (*p)->len);
target += (*p)->len;
free_STRING(*p);
p++;
}
mawk_zfree(MAWK, sblock, vcnt * sizeof(mawk_string_t *));
return cp;
}