/********************************************
split.c
libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/* For all splitting up to MAX_SPLIT fields go into
split_buff[], the rest go onto split_ov_list ( split
mawk_overflow list)
We can split one of three ways:
(1) By space:
mawk_space_split() and space_ov_split()
(2) By regular expression:
mawk_re_split() and re_ov_split()
(3) By "" (null -- split into characters)
mawk_null_split() and null_ov_split()
*/
#define TEMPBUFF_GOES_HERE
#include "mawk.h"
#include "symtype.h"
#include "bi_vars.h"
#include "bi_funct.h"
#include "memory.h"
#include "scan.h"
#include "regexp.h"
#include "field.h"
static int re_ov_split(mawk_state_t *, char *, PTR);
static int space_ov_split(mawk_state_t *, char *, char *);
static int null_ov_split(mawk_state_t *, char *);
/* split string s of length slen on SPACE without changing s.
load the pieces into STRINGS and ptrs into
split_buff[]
return the number of pieces */
int mawk_space_split(mawk_state_t *MAWK, register char *s, unsigned slen)
{
char *back = s + slen;
int i = 0;
int len;
char *q;
mawk_string_t *sval;
int lcnt = MAX_SPLIT / 3;
#define EAT_SPACE() while ( MAWK->scan_code[*(unsigned char*)s] ==\
SC_SPACE ) s++
#define EAT_NON_SPACE() \
*back = ' ' ; /* sentinel */\
while ( MAWK->scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\
*back = 0
while (lcnt--) {
EAT_SPACE();
if (*s == 0)
goto done;
/* mark the front with q */
q = s++;
EAT_NON_SPACE();
sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q);
memcpy(sval->str, q, len);
EAT_SPACE();
if (*s == 0)
goto done;
q = s++;
EAT_NON_SPACE();
sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q);
memcpy(sval->str, q, len);
EAT_SPACE();
if (*s == 0)
goto done;
q = s++;
EAT_NON_SPACE();
sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q);
memcpy(sval->str, q, len);
}
/* we've mawk_overflowed */
return i + space_ov_split(MAWK, s, back);
done:
return i;
}
static int space_ov_split(mawk_state_t *MAWK, register char *s, char *back)
{
SPLIT_OV dummy;
register SPLIT_OV *tail = &dummy;
char *q;
int cnt = 0;
unsigned len;
while (1) {
EAT_SPACE();
if (*s == 0)
break; /* done */
q = s++;
EAT_NON_SPACE();
tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
tail->sval = mawk_new_STRING0(MAWK, len = s - q);
memcpy(tail->sval->str, q, len);
cnt++;
}
tail->link = (SPLIT_OV *) 0;
MAWK->split_ov_list = dummy.link;
return cnt;
}
/* match a string with a regular expression, but
only matches of positive length count */
char *mawk_re_pos_match(mawk_state_t *MAWK, register char *s, PTR re, unsigned *lenp)
{
while ((s = mawk_REmatch(MAWK, s, re, lenp, 0)))
if (*lenp)
return s;
else if (*s == 0)
break;
else
s++;
return (char *) 0;
}
int mawk_re_split(mawk_state_t *MAWK, char *s, PTR re)
{
register char *t;
int i = 0;
unsigned mlen, len;
mawk_string_t *sval;
int lcnt = MAX_SPLIT / 3;
while (lcnt--) {
if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen)))
goto done;
sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s);
memcpy(sval->str, s, len);
s = t + mlen;
if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen)))
goto done;
sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s);
memcpy(sval->str, s, len);
s = t + mlen;
if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen)))
goto done;
sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s);
memcpy(sval->str, s, len);
s = t + mlen;
}
/* we've mawk_overflowed */
return i + re_ov_split(MAWK, s, re);
done:
split_buff[i++] = mawk_new_STRING(MAWK, s);
return i;
}
/*
we've mawk_overflowed split_buff[] , put
the rest on the split_ov_list
return number of pieces
*/
static int re_ov_split(mawk_state_t *MAWK, char *s, PTR re)
{
SPLIT_OV dummy;
register SPLIT_OV *tail = &dummy;
int cnt = 1;
char *t;
unsigned len, mlen;
while ((t = mawk_re_pos_match(MAWK, s, re, &mlen))) {
tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
tail->sval = mawk_new_STRING0(MAWK, len = t - s);
memcpy(tail->sval->str, s, len);
s = t + mlen;
cnt++;
}
/* and one more */
tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
tail->sval = mawk_new_STRING(MAWK, s);
tail->link = (SPLIT_OV *) 0;
MAWK->split_ov_list = dummy.link;
return cnt;
}
int mawk_null_split(mawk_state_t *MAWK, char *s)
{
int cnt = 0; /* number of fields split */
mawk_string_t *sval;
int i = 0; /* indexes split_buff[] */
while (*s) {
if (cnt == MAX_SPLIT)
return cnt + null_ov_split(MAWK, s);
sval = mawk_new_STRING0(MAWK, 1);
sval->str[0] = *s++;
split_buff[i++] = sval;
cnt++;
}
return cnt;
}
static int null_ov_split(mawk_state_t *MAWK, char *s)
{
SPLIT_OV dummy;
SPLIT_OV *ovp = &dummy;
int cnt = 0;
while (*s) {
ovp = ovp->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
ovp->sval = mawk_new_STRING0(MAWK, 1);
ovp->sval->str[0] = *s++;
cnt++;
}
ovp->link = (SPLIT_OV *) 0;
MAWK->split_ov_list = dummy.link;
return cnt;
}
/* split(s, X, r)
split s into array X on r
entry: sp[0] holds r
sp[-1] pts at X
sp[-2] holds s
*/
mawk_cell_t *mawk_bi_split(mawk_state_t *MAWK, register mawk_cell_t *sp)
{
int cnt; /* the number of pieces */
if (sp->type < C_RE)
mawk_cast_for_split(MAWK, sp);
/* can be C_RE, C_SPACE or C_SNULL */
sp -= 2;
if (sp->type < C_STRING)
mawk_cast1_to_str(MAWK, sp);
if (string(sp)->len == 0) /* nothing to split */
cnt = 0;
else
switch ((sp + 2)->type) {
case C_RE:
cnt = mawk_re_split(MAWK, string(sp)->str, (sp + 2)->ptr);
break;
case C_SPACE:
cnt = mawk_space_split(MAWK, string(sp)->str, string(sp)->len);
break;
case C_SNULL: /* split on empty string */
cnt = mawk_null_split(MAWK, string(sp)->str);
break;
default:
mawk_bozo(MAWK, "bad splitting cell in bi_split");
}
free_STRING(string(sp));
sp->type = C_NUM;
sp->d.dval = (mawk_num_t) cnt;
mawk_array_load(MAWK, (mawk_array_t) (sp + 1)->ptr, cnt);
return sp;
}