/********************************************
fin_exec.c
libmawk changes (C) 2009-2013, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991, 1992. Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
#include "conf.h"
#include <stdlib.h>
#include <string.h>
#include "mawk.h"
#include "fin.h"
#include "memory.h"
#include "bi_vars.h"
#include "field.h"
#include "symtype.h"
#include "scan.h"
#include "vio.h"
#include "init.h"
#include "vars.h"
#include "cell.h"
#include "files.h"
#ifndef NO_FCNTL_H
#include <fcntl.h>
#endif
/* main_input is a FILE_NODE to the main input stream
== 0 never been opened */
static void set_main_to_stdin(mawk_state_t * MAWK)
{
FILE_NODE *fn;
mawk_cell_destroy(MAWK, FILENAME);
FILENAME->type = C_STRING;
FILENAME->ptr = (PTR) mawk_new_STRING(MAWK, "-");
mawk_cell_destroy(MAWK, FNR);
FNR->type = C_NUM;
FNR->d.dval = MAWK_NUM_ZERO;
MAWK->rt_fnr = 0;
MAWK->main_input = mawk_file_find_(MAWK, "/dev/stdin", F_IN, 1);
TODO(": abstract")
MAWK->main_input->fin->flags |= MAWK_INPF_MAIN;
}
/* get the next command line file open */
static mawk_input_t *next_main(mawk_state_t *MAWK, int open_flag)
{
/* open_flag: called by mawk_FINopen_main() if on */
register mawk_cell_t *cp;
mawk_cell_t argc; /* copy of ARGC */
mawk_cell_t c_argi; /* cell copy of argi */
mawk_cell_t argval; /* copy of ARGV[c_argi] */
argval.type = C_NOINIT;
c_argi.type = C_NUM;
if (MAWK->main_input != NULL)
mawk_file_close_(MAWK, MAWK->main_input);
MAWK->main_input = NULL;
/* FILENAME and FNR don't change unless we really open
a new file */
/* make a copy of ARGC to avoid side effect */
mawk_cellcpy(MAWK, &argc, ARGC);
if (argc.type != C_NUM)
mawk_cast1_to_num(MAWK, &argc);
while (MAWK->argi < argc.d.dval) {
c_argi.d.dval = MAWK->argi;
MAWK->argi += MAWK_NUM_ONE;
if ((mawk_array_find(MAWK, MAWK->Argv, &c_argi, &argval, NO_MAWK_CREATE)) == 0)
continue; /* its deleted */
/* make a copy so we can mawk_cast w/o side effect */
cp = &argval;
if (cp->type < C_STRING)
mawk_cast1_to_str(MAWK, cp);
if (string(cp)->len == 0)
continue;
/* file argument is "" */
/* it might be a command line assignment */
if (mawk_is_cmdline_assign(MAWK, string(cp)->str))
continue;
/* try to open it -- we used to continue on failure,
but posix says we should quit */
if ((MAWK->main_input = mawk_file_find(MAWK, string(cp), F_IN, 1)) == NULL) {
mawk_errmsg(MAWK, errno, "cannot open %s", string(cp)->str);
mawk_exitval(MAWK, 2, NULL);
}
TODO("TODO abstract this")
MAWK->main_input->fin->flags |= MAWK_INPF_MAIN;
/* success -- set FILENAME and FNR */
mawk_cell_destroy(MAWK, FILENAME);
mawk_cellcpy(MAWK, FILENAME, cp);
free_STRING(string(cp));
mawk_cell_destroy(MAWK, FNR);
FNR->type = C_NUM;
FNR->d.dval = MAWK_NUM_ZERO;
MAWK->rt_fnr = 0;
return MAWK->main_input->fin;
}
/* failure */
mawk_cell_destroy(MAWK, &argval);
if (open_flag) {
/* all arguments were null or assignment */
set_main_to_stdin(MAWK);
return MAWK->main_input->fin;
}
/* real failure */
{
/* this is how we mark EOF on main_fin */
static mawk_input_t dead_main = { NULL, NULL, NULL, 0, 0,
MAWK_INPF_EOF | MAWK_INPF_DEAD_NO_FREE | MAWK_INPF_NO_MORE_INPUTS};
MAWK->main_input = mawk_file_register_nofin(MAWK, "DEAD_MAIN", F_IN, NULL);
MAWK->main_input->fin = &dead_main;
return MAWK->main_input->fin;
/* since MAWK_INPF_MAIN is not set, mawk_FINgets won't call next_main() */
}
}
/* this gets called once to get the input stream going.
It is called after the execution of the BEGIN block
unless there is a getline inside BEGIN {}
*/
void mawk_FINopen_main(mawk_state_t * MAWK)
{
mawk_cell_t argc;
mawk_cellcpy(MAWK, &argc, ARGC);
if (argc.type != C_NUM)
mawk_cast1_to_num(MAWK, &argc);
if (argc.d.dval == MAWK_NUM_ONE)
set_main_to_stdin(MAWK);
else
next_main(MAWK, 1);
}
int mawk_is_cmdline_assign(mawk_state_t *MAWK, char *s)
{
register char *p;
int c;
mawk_cell_t *cp;
unsigned len;
mawk_cell_t *fp = (mawk_cell_t *) 0; /* ditto */
if (MAWK->scan_code[*(unsigned char *) s] != SC_IDCHAR)
return 0;
p = s + 1;
while ((c = MAWK->scan_code[*(unsigned char *) p]) == SC_IDCHAR || c == SC_DIGIT)
p++;
if (*p != '=')
return 0;
*p = 0;
cp = mawk_create_var(MAWK, s, &fp);
if (cp == NULL) {
mawk_rt_error(MAWK, "cannot command line assign to %s\n\ttype clash or keyword", s);
}
/* we need to keep ARGV[i] intact */
*p++ = '=';
len = strlen(p) + 1;
/* posix says escape sequences are on from command line */
p = mawk_rm_escape(MAWK, strcpy((char *) mawk_zmalloc(MAWK, len), p));
cp->ptr = (PTR) mawk_new_STRING(MAWK, p);
mawk_zfree(MAWK, p, len);
mawk_check_strnum(MAWK, cp); /* sets cp->type */
if (fp) { /* move it from cell to pfield[] */
mawk_field_assign(MAWK, fp, cp);
free_STRING(string(cp));
}
return 1;
}
static char *find_sep(mawk_state_t *MAWK, int at_end, char *str, unsigned int *match_len)
{
register char *q, *start;
/* set up split rule (match pattern and set match length) */
switch (MAWK->rs_shadow.type) {
case SEP_CHAR: /* single char sep */
start = strchr(str, MAWK->rs_shadow.c);
*match_len = 1;
break;
case SEP_STR: /* static string sep */
*match_len = ((mawk_string_t *) MAWK->rs_shadow.ptr)->len;
start = mawk_str_str(str, ((mawk_string_t *) MAWK->rs_shadow.ptr)->str, *match_len);
break;
/* regex or MLR sep */
case SEP_MLR:
case SEP_RE:
start = mawk_re_pos_match(MAWK, str, MAWK->rs_shadow.ptr, match_len);
/* if the match is at the end, there might still be
more to match in the file */
if (start && start[*match_len] == 0 && !at_end)
start = (char *) 0;
break;
default:
mawk_bozo(MAWK, "type of rs_shadow");
}
return start;
}
/* return one input record as determined by RS,
from input file (FIN) fin
*/
char *mawk_FINgets(mawk_state_t *MAWK, FILE_NODE *fn, unsigned *len_p)
{
register char *p, *q;
char *sep_at;
unsigned match_len, available;
long r;
while(!MAWK->do_exit) { /* restart */
mawk_input_t *fin = fn->fin;
if (fin->flags & MAWK_INPF_NO_MORE_INPUTS)
return NULL;
/* at least try reading some more before giving up */
fin->flags &= ~MAWK_INPF_NO_MORE;
if ((fin->used == 0) && (fin->flags & MAWK_INPF_EOF)) {
if (fin->flags & MAWK_INPF_MAIN) {
fin = next_main(MAWK, 0);
if (fin == NULL)
return NULL;
continue; /* restart */
}
else {
/* eof on a non-main file: no chance to get another file, report eof and exit */
return NULL;
}
}
/* have to retry finding a sep even if the buffer is the same partial
buffer we had last time: RS may have changed between the two calls!
The only exception is when buffer is empty for sure */
if ((fin->used > 0) && (*fin->next != '\0')) {
sep_at = find_sep(MAWK, (fin->flags & MAWK_INPF_EOF), fin->next, &match_len);
/* did find the separator pattern, cut string and return the from the beginning of the record */
if (sep_at != NULL) {
char *start = fin->next;
/* the easy and normal case: found a record */
*sep_at = 0;
*len_p = sep_at - start;
fin->next = sep_at + match_len;
if (fin->next - fin->buf >= fin->used) {
/* the buffer got empty - update things to make it faster */
fin->next = fin->buf;
fin->used = 0;
}
*len_p = strlen(start);
return start;
}
/* no sep, but eof... */
if (fin->flags & MAWK_INPF_EOF) {
char *s;
/* ...last line without a record terminator! Return it anyway */
*len_p = r = strlen(fin->next);
s = fin->next + r;
/* WHAT? for some reason we remove the last newline here */
if (MAWK->rs_shadow.type == SEP_MLR && s[-1] == '\n' && r != 0) {
(*len_p)--;
*--s = 0;
}
s = fin->next;
fin->next = fin->buf;
fin->used = 0;
return s;
}
/* didn't find a separator and we are not at the end of the file */
if (fin->next != fin->buf) {
int new_len;
/* we are deep into the buffer, the buffer ends with a partial record.
Move it to the beginning of the buffer */
new_len = fin->used - (fin->next - fin->buf);
if (new_len > 0)
memmove(fin->buf, fin->next, new_len);
fin->used = new_len;
fin->next = fin->buf;
}
/* ... so try to read some more data */
}
available = fin->alloced - fin->used;
if (available < BUFFSZ/2) {
/* have to grow */
int next_offs = fin->next - fin->buf;
fin->buf = mawk_zrealloc(MAWK, fin->buf, fin->alloced, fin->alloced + BUFFSZ);
fin->alloced += BUFFSZ;
available += BUFFSZ;
fin->next = fin->buf + next_offs;
}
r = mawk_fillbuff(MAWK, fin, fin->buf + fin->used, available-1, MAWK->interactive_flag);
if (r == 0) {
fin->flags |= MAWK_INPF_EOF;
continue; /* may have a next file (???might be main) */
}
else if (r == mawk_FIN_nomore) {
/* no more to read now and we had at most a partial record in buffer */
return (char *)mawk_FIN_nomore;
}
else if (r < 0) {
return NULL;
}
fin->used += r;
fin->buf[fin->used] = '\0';
if (fin->flags & MAWK_INPF_START) {
if (MAWK->rs_shadow.type == SEP_MLR) {
char *s;
/* trim blank lines from front of file */
TODO(": probably accept \r as well")
for(s = fin->next; *s == '\n'; s++) ;
if (*s == '\0') {
/* emptied the buffer with all the \n's... so get back to initial state */
fin->next = fin->buf;
fin->used = 0;
continue; /* restart */
}
/* found a non-'\n', use that as a potential start of the next record */
fin->flags &= ~MAWK_INPF_START; /* we are not at the start anymore */
fin->next = s;
continue; /* restart: read on */
}
else
fin->flags &= ~MAWK_INPF_START;
}
}
/* get here if MAWK -> do_exit */
return NULL;
}