Codebase list libmawk / debian/1.0.0-1 src / libmawk / split.c
debian/1.0.0-1

Tree @debian/1.0.0-1 (Download .tar.gz)

split.c @debian/1.0.0-1raw · history · blame

/********************************************
split.c

libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the AWK programming language.

Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/

/* For all splitting up to MAX_SPLIT fields go into
   split_buff[], the rest go onto split_ov_list ( split
   mawk_overflow list)

   We can split one of three ways:
     (1) By space:
	 mawk_space_split() and space_ov_split()
     (2) By regular expression:
	 mawk_re_split()    and re_ov_split()
     (3) By "" (null -- split into characters)
	 mawk_null_split() and null_ov_split()
*/

#define	 TEMPBUFF_GOES_HERE

#include "mawk.h"
#include "symtype.h"
#include "bi_vars.h"
#include "bi_funct.h"
#include "memory.h"
#include "scan.h"
#include "regexp.h"
#include "field.h"

static int re_ov_split(mawk_state_t *, char *, PTR);
static int space_ov_split(mawk_state_t *, char *, char *);
static int null_ov_split(mawk_state_t *, char *);

/* split string s of length slen on SPACE without changing s.
   load the pieces into STRINGS and ptrs into
   split_buff[]
   return the number of pieces */

int mawk_space_split(mawk_state_t *MAWK, register char *s, unsigned slen)
{
	char *back = s + slen;
	int i = 0;
	int len;
	char *q;
	mawk_string_t *sval;
	int lcnt = MAX_SPLIT / 3;

#define EAT_SPACE()   while ( MAWK->scan_code[*(unsigned char*)s] ==\
			      SC_SPACE )  s++
#define EAT_NON_SPACE()	  \
    *back = ' ' ; /* sentinel */\
    while ( MAWK->scan_code[*(unsigned char*)s] != SC_SPACE )	 s++ ;\
    *back = 0


	while (lcnt--) {
		EAT_SPACE();
		if (*s == 0)
			goto done;
		/* mark the front with q */
		q = s++;
		EAT_NON_SPACE();
		sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q);
		memcpy(sval->str, q, len);

		EAT_SPACE();
		if (*s == 0)
			goto done;
		q = s++;
		EAT_NON_SPACE();
		sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q);
		memcpy(sval->str, q, len);

		EAT_SPACE();
		if (*s == 0)
			goto done;
		q = s++;
		EAT_NON_SPACE();
		sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q);
		memcpy(sval->str, q, len);

	}
	/* we've mawk_overflowed */
	return i + space_ov_split(MAWK, s, back);

done:
	return i;
}

static int space_ov_split(mawk_state_t *MAWK, register char *s, char *back)
{
	SPLIT_OV dummy;
	register SPLIT_OV *tail = &dummy;
	char *q;
	int cnt = 0;
	unsigned len;

	while (1) {
		EAT_SPACE();
		if (*s == 0)
			break;										/* done */
		q = s++;
		EAT_NON_SPACE();

		tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
		tail->sval = mawk_new_STRING0(MAWK, len = s - q);
		memcpy(tail->sval->str, q, len);
		cnt++;
	}

	tail->link = (SPLIT_OV *) 0;
	MAWK->split_ov_list = dummy.link;
	return cnt;
}

/* match a string with a regular expression, but
   only matches of positive length count */
char *mawk_re_pos_match(mawk_state_t *MAWK, register char *s, PTR re, unsigned *lenp)
{
	while ((s = mawk_REmatch(MAWK, s, re, lenp, 0)))
		if (*lenp)
			return s;
		else if (*s == 0)
			break;
		else
			s++;

	return (char *) 0;
}

int mawk_re_split(mawk_state_t *MAWK, char *s, PTR re)
{
	register char *t;
	int i = 0;
	unsigned mlen, len;
	mawk_string_t *sval;
	int lcnt = MAX_SPLIT / 3;

	while (lcnt--) {
		if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen)))
			goto done;
		sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s);
		memcpy(sval->str, s, len);
		s = t + mlen;

		if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen)))
			goto done;
		sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s);
		memcpy(sval->str, s, len);
		s = t + mlen;

		if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen)))
			goto done;
		sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s);
		memcpy(sval->str, s, len);
		s = t + mlen;
	}
	/* we've mawk_overflowed */
	return i + re_ov_split(MAWK, s, re);

done:
	split_buff[i++] = mawk_new_STRING(MAWK, s);
	return i;
}

/*
  we've mawk_overflowed split_buff[] , put
  the rest on the split_ov_list
  return number of pieces
*/

static int re_ov_split(mawk_state_t *MAWK, char *s, PTR re)
{
	SPLIT_OV dummy;
	register SPLIT_OV *tail = &dummy;
	int cnt = 1;
	char *t;
	unsigned len, mlen;

	while ((t = mawk_re_pos_match(MAWK, s, re, &mlen))) {
		tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
		tail->sval = mawk_new_STRING0(MAWK, len = t - s);
		memcpy(tail->sval->str, s, len);
		s = t + mlen;
		cnt++;
	}
	/* and one more */
	tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
	tail->sval = mawk_new_STRING(MAWK, s);
	tail->link = (SPLIT_OV *) 0;
	MAWK->split_ov_list = dummy.link;

	return cnt;
}


int mawk_null_split(mawk_state_t *MAWK, char *s)
{
	int cnt = 0;									/* number of fields split */
	mawk_string_t *sval;
	int i = 0;										/* indexes split_buff[] */

	while (*s) {
		if (cnt == MAX_SPLIT)
			return cnt + null_ov_split(MAWK, s);

		sval = mawk_new_STRING0(MAWK, 1);
		sval->str[0] = *s++;
		split_buff[i++] = sval;
		cnt++;
	}
	return cnt;
}

static int null_ov_split(mawk_state_t *MAWK, char *s)
{
	SPLIT_OV dummy;
	SPLIT_OV *ovp = &dummy;
	int cnt = 0;

	while (*s) {
		ovp = ovp->link = MAWK_ZMALLOC(MAWK, SPLIT_OV);
		ovp->sval = mawk_new_STRING0(MAWK, 1);
		ovp->sval->str[0] = *s++;
		cnt++;
	}
	ovp->link = (SPLIT_OV *) 0;
	MAWK->split_ov_list = dummy.link;
	return cnt;
}


/*  split(s, X, r)
    split s into array X on r

    entry: sp[0] holds r
	   sp[-1] pts at X
	   sp[-2] holds s
*/
mawk_cell_t *mawk_bi_split(mawk_state_t *MAWK, register mawk_cell_t *sp)
{
	int cnt;											/* the number of pieces */


	if (sp->type < C_RE)
		mawk_cast_for_split(MAWK, sp);
	/* can be C_RE, C_SPACE or C_SNULL */
	sp -= 2;
	if (sp->type < C_STRING)
		mawk_cast1_to_str(MAWK, sp);

	if (string(sp)->len == 0)			/* nothing to split */
		cnt = 0;
	else
		switch ((sp + 2)->type) {
		case C_RE:
			cnt = mawk_re_split(MAWK, string(sp)->str, (sp + 2)->ptr);
			break;

		case C_SPACE:
			cnt = mawk_space_split(MAWK, string(sp)->str, string(sp)->len);
			break;

		case C_SNULL:							/* split on empty string */
			cnt = mawk_null_split(MAWK, string(sp)->str);
			break;

		default:
			mawk_bozo(MAWK, "bad splitting cell in bi_split");
		}


	free_STRING(string(sp));
	sp->type = C_NUM;
	sp->d.dval = (mawk_num_t) cnt;

	mawk_array_load(MAWK, (mawk_array_t) (sp + 1)->ptr, cnt);

	return sp;
}