Codebase list libmawk / debian/1.0.0-1 src / libmawk / fin_exec.c
debian/1.0.0-1

Tree @debian/1.0.0-1 (Download .tar.gz)

fin_exec.c @debian/1.0.0-1raw · history · blame

/********************************************
fin_exec.c

libmawk changes (C) 2009-2013, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:

copyright 1991, 1992.  Michael D. Brennan

This is a source file for mawk, an implementation of
the AWK programming language.

Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
#include "conf.h"
#include <stdlib.h>
#include <string.h>
#include "mawk.h"
#include "fin.h"
#include "memory.h"
#include "bi_vars.h"
#include "field.h"
#include "symtype.h"
#include "scan.h"
#include "vio.h"
#include "init.h"
#include "vars.h"
#include "cell.h"
#include "files.h"

#ifndef	  NO_FCNTL_H
#include <fcntl.h>
#endif

/* main_input is a FILE_NODE to the main input stream
   == 0	 never been opened   */

static void set_main_to_stdin(mawk_state_t * MAWK)
{
	FILE_NODE *fn;
	mawk_cell_destroy(MAWK, FILENAME);
	FILENAME->type = C_STRING;
	FILENAME->ptr = (PTR) mawk_new_STRING(MAWK, "-");
	mawk_cell_destroy(MAWK, FNR);
	FNR->type = C_NUM;
	FNR->d.dval = MAWK_NUM_ZERO;
	MAWK->rt_fnr = 0;
	MAWK->main_input = mawk_file_find_(MAWK, "/dev/stdin", F_IN, 1);
#warning TODO: abstract
	MAWK->main_input->fin->flags |= MAWK_INPF_MAIN;
}

/* get the next command line file open */
static mawk_input_t *next_main(mawk_state_t *MAWK, int open_flag)
{
	/* open_flag: called by mawk_FINopen_main() if on */
	register mawk_cell_t *cp;
	mawk_cell_t argc;										/* copy of ARGC */
	mawk_cell_t c_argi;									/* cell copy of argi */
	mawk_cell_t argval;									/* copy of ARGV[c_argi] */


	argval.type = C_NOINIT;
	c_argi.type = C_NUM;

	if (MAWK->main_input != NULL)
		mawk_file_close_(MAWK, MAWK->main_input);
		MAWK->main_input = NULL;
	/* FILENAME and FNR don't change unless we really open
	   a new file */

	/* make a copy of ARGC to avoid side effect */
	mawk_cellcpy(MAWK, &argc, ARGC);
	if (argc.type != C_NUM)
		mawk_cast1_to_num(MAWK, &argc);

	while (MAWK->argi < argc.d.dval) {
		c_argi.d.dval = MAWK->argi;
		MAWK->argi += MAWK_NUM_ONE;

		if ((mawk_array_find(MAWK, MAWK->Argv, &c_argi, &argval, NO_MAWK_CREATE)) == 0)
			continue;									/* its deleted */

		/* make a copy so we can mawk_cast w/o side effect */
		cp = &argval;
 		if (cp->type < C_STRING)
			mawk_cast1_to_str(MAWK, cp);
		if (string(cp)->len == 0)
			continue;
		/* file argument is "" */

		/* it might be a command line assignment */
		if (mawk_is_cmdline_assign(MAWK, string(cp)->str))
			continue;

		/* try to open it -- we used to continue on failure,
		   but posix says we should quit */
		if ((MAWK->main_input = mawk_file_find(MAWK, string(cp), F_IN, 1)) == NULL) {
			mawk_errmsg(MAWK, errno, "cannot open %s", string(cp)->str);
			mawk_exitval(MAWK, 2, NULL);
		}
#warning TODO abstract this
		MAWK->main_input->fin->flags |= MAWK_INPF_MAIN;

		/* success -- set FILENAME and FNR */
		mawk_cell_destroy(MAWK, FILENAME);
		mawk_cellcpy(MAWK, FILENAME, cp);
		free_STRING(string(cp));
		mawk_cell_destroy(MAWK, FNR);
		FNR->type = C_NUM;
		FNR->d.dval = MAWK_NUM_ZERO;
		MAWK->rt_fnr = 0;

		return MAWK->main_input->fin;
	}
	/* failure */
	mawk_cell_destroy(MAWK, &argval);

	if (open_flag) {
		/* all arguments were null or assignment */
		set_main_to_stdin(MAWK);
		return MAWK->main_input->fin;
	}

	/* real failure */
	{
		/* this is how we mark EOF on main_fin  */
		static mawk_input_t dead_main = { NULL, NULL, NULL, 0, 0,
			MAWK_INPF_EOF | MAWK_INPF_DEAD_NO_FREE | MAWK_INPF_NO_MORE_INPUTS};

		MAWK->main_input = mawk_file_register_nofin(MAWK, "DEAD_MAIN", F_IN, NULL);
		MAWK->main_input->fin = &dead_main;
		return MAWK->main_input->fin;
		/* since MAWK_INPF_MAIN is not set, mawk_FINgets won't call next_main() */
	}
}


/* this gets called once to get the input stream going.
   It is called after the execution of the BEGIN block
   unless there is a getline inside BEGIN {}
*/
void mawk_FINopen_main(mawk_state_t * MAWK)
{
	mawk_cell_t argc;

	mawk_cellcpy(MAWK, &argc, ARGC);
	if (argc.type != C_NUM)
		mawk_cast1_to_num(MAWK, &argc);

	if (argc.d.dval == MAWK_NUM_ONE)
		set_main_to_stdin(MAWK);
	else
		next_main(MAWK, 1);
}

int mawk_is_cmdline_assign(mawk_state_t *MAWK, char *s)
{
	register char *p;
	int c;
	mawk_cell_t *cp;
	unsigned len;
	mawk_cell_t *fp = (mawk_cell_t *) 0;				/* ditto */

	if (MAWK->scan_code[*(unsigned char *) s] != SC_IDCHAR)
		return 0;

	p = s + 1;
	while ((c = MAWK->scan_code[*(unsigned char *) p]) == SC_IDCHAR || c == SC_DIGIT)
		p++;

	if (*p != '=')
		return 0;

	*p = 0;

	cp = mawk_create_var(MAWK, s, &fp);
	if (cp == NULL) {
		mawk_rt_error(MAWK, "cannot command line assign to %s\n\ttype clash or keyword", s);
	}

	/* we need to keep ARGV[i] intact */
	*p++ = '=';
	len = strlen(p) + 1;
	/* posix says escape sequences are on from command line */
	p = mawk_rm_escape(MAWK, strcpy((char *) mawk_zmalloc(MAWK, len), p));
	cp->ptr = (PTR) mawk_new_STRING(MAWK, p);
	mawk_zfree(MAWK, p, len);
	mawk_check_strnum(MAWK, cp);	/* sets cp->type */
	if (fp) {											/* move it from cell to pfield[] */
		mawk_field_assign(MAWK, fp, cp);
		free_STRING(string(cp));
	}
	return 1;
}

static char *find_sep(mawk_state_t *MAWK, int at_end, char *str, unsigned int *match_len)
{
	register char *q, *start;

	/* set up split rule (match pattern and set match length) */
	switch (MAWK->rs_shadow.type) {
	case SEP_CHAR: /* single char sep */
		start = strchr(str, MAWK->rs_shadow.c);
		*match_len = 1;
		break;

	case SEP_STR: /* static string sep */
		*match_len = ((mawk_string_t *) MAWK->rs_shadow.ptr)->len;
		start = mawk_str_str(str, ((mawk_string_t *) MAWK->rs_shadow.ptr)->str, *match_len);
		break;

	/* regex or MLR sep */
	case SEP_MLR:
	case SEP_RE:
		start = mawk_re_pos_match(MAWK, str, MAWK->rs_shadow.ptr, match_len);
		/* if the match is at the end, there might still be
		   more to match in the file */
		if (start && start[*match_len] == 0 && !at_end)
			start = (char *) 0;
		break;

	default:
		mawk_bozo(MAWK, "type of rs_shadow");
	}
	return start;
}

/* return one input record as determined by RS,
   from input file (FIN)  fin
*/
char *mawk_FINgets(mawk_state_t *MAWK, FILE_NODE *fn, unsigned *len_p)
{
	register char *p, *q;
	char *sep_at;
	unsigned match_len, available;
	long r;

	while(!MAWK->do_exit) { /* restart */
		mawk_input_t *fin = fn->fin;

		if (fin->flags & MAWK_INPF_NO_MORE_INPUTS)
			return NULL;

		/* at least try reading some more before giving up */
		fin->flags &= ~MAWK_INPF_NO_MORE;

		if ((fin->used == 0) && (fin->flags & MAWK_INPF_EOF)) {
			if (fin->flags & MAWK_INPF_MAIN) {
				fin = next_main(MAWK, 0);
				if (fin == NULL)
					return NULL;
				continue; /* restart */
			}
			else {
				/* eof on a non-main file: no chance to get another file, report eof and exit */
				return NULL;
			}
		}

		/* have to retry finding a sep even if the buffer is the same partial
		   buffer we had last time: RS may have changed between the two calls!
		   The only exception is when buffer is empty for sure */
		if ((fin->used > 0) && (*fin->next != '\0')) {
			sep_at = find_sep(MAWK, (fin->flags & MAWK_INPF_EOF), fin->next, &match_len);

			/* did find the separator pattern, cut string and return the from the beginning of the record */
			if (sep_at != NULL) {
				char *start = fin->next;
				/* the easy and normal case: found a record */
				*sep_at = 0;
				*len_p = sep_at - start;
				fin->next = sep_at + match_len;
				if (fin->next - fin->buf >= fin->used) {
					/* the buffer got empty - update things to make it faster */
					fin->next = fin->buf;
					fin->used = 0;
				}
				*len_p = strlen(start);
				return start;
			}

			/* no sep, but eof... */
			if (fin->flags & MAWK_INPF_EOF) {
				char *s;
				/* ...last line without a record terminator! Return it anyway */
				*len_p = r = strlen(fin->next);
				s = fin->next + r;
				/* WHAT? for some reason we remove the last newline here */
				if (MAWK->rs_shadow.type == SEP_MLR && s[-1] == '\n' && r != 0) {
					(*len_p)--;
					*--s = 0;
				}
				s = fin->next;
				fin->next = fin->buf;
				fin->used = 0;
				return s;
			}

			/* didn't find a separator and we are not at the end of the file */
			if (fin->next != fin->buf) {
				int new_len;
				/* we are deep into the buffer, the buffer ends with a partial record.
				   Move it to the beginning of the buffer */
				new_len = fin->used - (fin->next - fin->buf);
				if (new_len > 0)
					memmove(fin->buf, fin->next, new_len);
				fin->used = new_len;
				fin->next = fin->buf;
			}
			/* ... so try to read some more data */
		}

		available = fin->alloced - fin->used;
		if (available < BUFFSZ/2) {
			/* have to grow */
			int next_offs = fin->next - fin->buf;

			fin->buf = mawk_zrealloc(MAWK, fin->buf, fin->alloced, fin->alloced + BUFFSZ);
			fin->alloced += BUFFSZ;
			available += BUFFSZ;
			fin->next = fin->buf + next_offs;
		}
		r = mawk_fillbuff(MAWK, fin, fin->buf + fin->used, available-1, MAWK->interactive_flag);
		if (r == 0) {
			fin->flags |= MAWK_INPF_EOF;
			continue; /* may have a next file (???might be main) */
		}
		else if (r == mawk_FIN_nomore) {
			/* no more to read now and we had at most a partial record in buffer */
			return (char *)mawk_FIN_nomore;
		}
		else if (r < 0) {
			return NULL;
		}

		fin->used += r;
		fin->buf[fin->used] = '\0';

		if (fin->flags & MAWK_INPF_START) {
			if (MAWK->rs_shadow.type == SEP_MLR) {
				char *s;
				/* trim blank lines from front of file */
#warning TODO: probably accept \r as well
				for(s = fin->next; *s == '\n'; s++) ;
				if (*s == '\0') {
					/* emptied the buffer with all the \n's... so get back to initial state */
					fin->next = fin->buf;
					fin->used = 0;
					continue; /* restart */
				}
				/* found a non-'\n', use that as a potential start of the next record */
				fin->flags &= ~MAWK_INPF_START; /* we are not at the start anymore */
				fin->next = s;
				continue; /* restart: read on */
			}
			else
				fin->flags &= ~MAWK_INPF_START;
		}
	}

	/* get here if MAWK -> do_exit */
	return NULL;
}