/********************************************
init.c
libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
#include "conf.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include "mawk.h"
#include "code.h"
#include "memory.h"
#include "symtype.h"
#include "init.h"
#include "scan.h"
#include "bi_vars.h"
#include "field.h"
#include "zmalloc.h"
#include "vio.h"
#include "version.h"
#include "da_bin_helper.h"
#include "cell.h"
#include "files.h"
static int process_cmdline(mawk_state_t *, int, char **);
static void set_ARGV(mawk_state_t *, int, char **, int);
static void bad_option(mawk_state_t *, char *);
static void no_program(mawk_state_t *);
extern int mawk_is_cmdline_assign(mawk_state_t *, char *);
#ifndef SET_PROGNAME
#define SET_PROGNAME() \
{char *p = strrchr(argv[0],'/') ;\
MAWK->progname = p ? p+1 : argv[0] ; }
#endif
static const mawk_string_t null_str_ = { 0, 1, "" }; /* read-only */
static const mawk_escape_t escape_test_[ET_END + 1] = /* read-only */
{
{'n', '\n'},
{'t', '\t'},
{'f', '\f'},
{'b', '\b'},
{'r', '\r'},
{'a', '\07'},
{'v', '\013'},
{'\\', '\\'},
{'\"', '\"'},
{0, 0}
};
mawk_state_t *mawk_initialize_alloc(void)
{
unsigned long mpow2[NUM_CELL_TYPES] = {
1LU<<0LU, 1LU<<1LU, 1LU<<2LU, 1LU<<3LU, 1LU<<4LU, 1LU<<5LU, 1LU<<6LU,
1LU<<7LU, 1LU<<8LU, 1LU<<9LU, 1LU<<10LU, 1LU<<11LU, 1LU<<12LU,
1LU<<13LU, 1LU<<14LU, 1LU<<15LU, 1LU<<16LU, 1LU<<17LU, 1LU<<18LU,
1LU<<19LU, 1LU<<20LU, 1LU<<21LU, 1LU<<22LU, 1LU<<23LU
};
SEPARATOR rs_ = { SEP_CHAR, '\n' };
mawk_cell_t fs_ = { C_SPACE };
mawk_state_t *MAWK;
MAWK = calloc(sizeof(mawk_state_t), 1);
MAWK->Argv = mawk_array_new(MAWK, NULL);
MAWK->scripts_loaded = mawk_array_new(MAWK, NULL);
MAWK->last_token_lineno = -1;
MAWK->null_str = null_str_;
MAWK->argi = MAWK_NUM_ONE;
MAWK->current_token = -1;
MAWK->execution_start = 0;
MAWK->ps.code_move_level = 0;
MAWK->stack_base = MAWK->eval_stack; /* these can move for deep recursion */
MAWK->stack_danger = MAWK->eval_stack + DANGER;
memcpy(MAWK->escape_test, escape_test_, sizeof(escape_test_));
MAWK->interactive_flag = 0;
MAWK->shell = mawk_strdup_("/bin/sh");
MAWK->max_field = MAX_SPLIT;
MAWK->rs_shadow = rs_;
MAWK->fs_shadow = fs_;
memcpy(MAWK->scan_code, mawk_scan_code, sizeof(mawk_scan_code));
/* this can be moved and enlarged by -W sprintf=num */
MAWK->sprintf_buff = string_buff;
MAWK->sprintf_limit = string_buff + sizeof(MAWK->tempbuff);
MAWK->mpow2 = malloc(sizeof(mpow2));
memcpy(MAWK->mpow2, mpow2, sizeof(mpow2));
MAWK->fbank[0] = MAWK->field;
mawk_bi_vars_init(MAWK); /* load the builtin variables */
mawk_bi_funct_init(MAWK); /* load the builtin functions */
#ifndef MAWK_NO_COMP
mawk_kw_init(MAWK); /* load the keywords */
#endif
mawk_field_init(MAWK);
mawk_fpe_init();
return MAWK;
}
mawk_state_t *mawk_initialize_argv(mawk_state_t *MAWK, int argc, char **argv)
{
SET_PROGNAME();
if (!process_cmdline(MAWK, argc, argv))
return NULL;
return MAWK;
}
mawk_state_t *mawk_initialize(int argc, char **argv, mawk_vio_init_t vio_init)
{
mawk_state_t *MAWK, *MAWK2;
MAWK = mawk_initialize_alloc();
if (MAWK == NULL)
return NULL;
MAWK->vio_init = vio_init;
MAWK2 = mawk_initialize_argv(MAWK, argc, argv);
if (MAWK2 == NULL) {
/* TODO: free MAWK */
return NULL;
}
mawk_code_init(MAWK2);
return MAWK2;
}
void mawk_hash_clear(mawk_state_t *MAWK);
void mawk_uninitialize(mawk_state_t * m)
{
FBLOCK *fb, *fbn;
#ifndef MAWK_NO_COMP
#ifdef MAWK_MEM_PEDANTIC
mawk_kw_uninit(m);
#endif
#endif
#ifdef MAWK_MEM_PEDANTIC
mawk_bi_funct_uninit(m);
#endif
/* free data of c calls compiled into the script */
for(fb = m->c_funcs; fb != NULL; fb = fbn) {
fbn = fb->c_next;
free((char *)fb->name);
free(fb);
}
if (m->shell != NULL)
free(m->shell);
if (m->mpow2 != NULL)
free(m->mpow2);
#ifdef MAWK_MEM_PEDANTIC
mawk_bi_vars_uninit(m);
mawk_field_uninit(m);
#endif
if (m->mawk_parser_stack != NULL)
free(m->mawk_parser_stack);
if (m->ps.buffer != NULL)
mawk_zfree(m, m->ps.buffer, BUFFSZ + 1);
if (m->scripts_loaded != NULL)
mawk_array_destroy(m, m->scripts_loaded);
#ifdef MAWK_MEM_PEDANTIC
if (m->begin_start != NULL)
mawk_zfree(m, m->begin_start, m->begin_size);
if (m->main_start != NULL)
mawk_zfree(m, m->main_start, m->main_size);
if (m->end_start_orig != NULL)
mawk_zfree(m, m->end_start_orig, m->end_size);
{
struct mawk_fdump *fdl, *next;
for(fdl = m->fdump_list; fdl != NULL; fdl = next) {
next = fdl->link;
mawk_delete(m, fdl->fbp->name, 1);
MAWK_ZFREE(m, fdl);
}
}
/* free global variables */
mawk_hash_clear(m);
#endif
/* close and free all files */
mawk_file_uninit(m);
mawk_free_all(m);
free(m);
}
static void bad_option(mawk_state_t *MAWK, char *s)
{
mawk_errmsg(MAWK, 0, "not an option: %s", s);
mawk_exit(MAWK, 2);
}
static void no_program(mawk_state_t * MAWK)
{
mawk_exit(MAWK, 0);
}
void mawk_append_input_file(mawk_state_t * MAWK, const char *fn, int bytecode)
{
/* first file goes in pfile_name ; any more go on a list */
if (MAWK->ps.pfile_name) {
if (MAWK->pfile_list_tail == NULL) {
MAWK->pfile_list_tail = MAWK_ZMALLOC(MAWK, PFILE);
MAWK->pfile_list_tail->fname = fn;
MAWK->pfile_list_tail->bytecode = bytecode;
MAWK->pfile_list = MAWK->pfile_list_tail;
}
else {
MAWK->pfile_list_tail->link = MAWK_ZMALLOC(MAWK, PFILE);
MAWK->pfile_list_tail = MAWK->pfile_list_tail->link;
MAWK->pfile_list_tail->fname = NULL;
MAWK->pfile_list_tail->bytecode = 0;
}
MAWK->pfile_list_tail->link = NULL;
}
else {
MAWK->ps.pfile_name = fn;
MAWK->ps.pfile_bytecode = bytecode;
}
}
#ifdef MAWK_NO_COMP
/* load a binary script; return 1 on success, 0 on failure */
static int mawk_load_bin(mawk_state_t *MAWK, const char *path)
{
if (mawk_load_code_bin(MAWK, path) != 0) {
mawk_errmsg(MAWK, 0, "failed to load or link binary script %s", path);
mawk_exitval(MAWK, 2, -1);
return 0;
}
return 1;
}
/* load all binary scripts; return 1 on success, 0 on failure */
static int mawk_load_bins(mawk_state_t *MAWK)
{
PFILE *p;
if (MAWK->ps.pfile_name != NULL)
if (!mawk_load_bin(MAWK, MAWK->ps.pfile_name))
return 0;
for(p = MAWK->pfile_list; p != NULL; p = p->link) {
if (!mawk_load_bin(MAWK, p->fname))
return 0;
}
return 1;
}
#endif
/* not a real implementation, just enough for our needs */
static int mawk_strcasecmp(const char *s1, const char *s2)
{
for(;;) {
if (tolower(*s1) != tolower(*s2))
return s2 - s1;
if (*s1 == '\0')
return 0;
s1++;
s2++;
}
}
static int process_cmdline(mawk_state_t *MAWK, int argc, char **argv)
{
int i, nextarg;
char *optarg;
MAWK->pfile_list = NULL;
MAWK->pfile_list_tail = NULL;
for (i = 1; i < argc && argv[i][0] == '-'; i = nextarg) {
if (argv[i][1] == 0) { /* - alone */
if (!MAWK->ps.pfile_name) {
no_program(MAWK);
return 0;
}
break; /* the for loop */
}
/* safe to look at argv[i][2] */
if (argv[i][2] == 0) {
if (i == argc - 1 && argv[i][1] != '-') {
if (strchr("WFvf", argv[i][1])) {
mawk_errmsg(MAWK, 0, "option %s lacks argument", argv[i]);
mawk_exit_(MAWK, 2);
return 0;
}
bad_option(MAWK, argv[i]);
}
optarg = argv[i + 1];
nextarg = i + 2;
}
else { /* argument glued to option */
optarg = &argv[i][2];
nextarg = i + 1;
}
switch (argv[i][1]) {
case 'W':
if (optarg[0] >= 'a' && optarg[0] <= 'z')
optarg[0] += 'A' - 'a';
if (optarg[0] == 'V')
mawk_print_version(MAWK);
else if (optarg[0] == 'C')
MAWK->dump_code_flag = 2;
else if (optarg[0] == 'D') {
if (mawk_strcasecmp(optarg, "DUMP") == 0)
MAWK->dump_code_flag = 1;
else if (mawk_strcasecmp(optarg, "DUMPSYM") == 0)
MAWK->dump_sym_flag = 1;
else if (mawk_strcasecmp(optarg, "DEBUG") == 0)
MAWK->debug_symbols = 1;
}
else if (optarg[0] == 'S') {
char *p = strchr(optarg, '=');
int x = p ? atoi(p + 1) : 0;
if (x > SPRINTF_SZ) {
MAWK->sprintf_buff = (char *) mawk_zmalloc(MAWK, x);
MAWK->sprintf_limit = MAWK->sprintf_buff + x;
}
}
else if (optarg[0] == 'P') {
MAWK->posix_space_flag = 1;
}
else if (optarg[0] == 'E') {
if (MAWK->ps.pfile_name) {
mawk_errmsg(MAWK, 0, "-W exec is incompatible with -f");
mawk_exit_(MAWK, 2);
return 0;
}
else if (nextarg == argc)
no_program(MAWK);
MAWK->ps.pfile_name = argv[nextarg];
MAWK->ps.pfile_bytecode = 0;
i = nextarg + 1;
goto no_more_opts;
}
else if (optarg[0] == 'I') {
MAWK->interactive_flag = 1;
}
else if (strncmp(optarg, "Maxmem=", 7) == 0) {
char *end;
MAWK->mm_max = strtol(optarg+7, &end, 10);
switch(*end) {
case '\0':
break;
case 'k':
case 'K':
MAWK->mm_max *= 1024;
break;
case 'm':
case 'M':
MAWK->mm_max *= 1024 * 1024;
break;
default:
MAWK->mm_max = 0;
mawk_errmsg(MAWK, 0, "invalid memory size for -Wmaxmem (must be integer with optional K or M suffix): '%s'", optarg+7);
}
}
else
mawk_errmsg(MAWK, 0, "vacuous option: -W %s", optarg);
break;
case 'v':
#ifdef MAWK_NO_EXEC
mawk_errmsg(MAWK, 0, "Compiler-only version of mawk can not set runtime variables with -v");
mawk_exit_(MAWK, 2);
return 0;
#else
if (!mawk_is_cmdline_assign(MAWK, optarg)) {
mawk_errmsg(MAWK, 0, "improper assignment: -v %s", optarg);
mawk_exit_(MAWK, 2);
return 0;
}
#endif
break;
case 'F':
mawk_rm_escape(MAWK, optarg); /* recognize escape sequences */
mawk_cell_destroy(MAWK, MAWK_FS);
MAWK_FS->type = C_STRING;
MAWK_FS->ptr = (PTR) mawk_new_STRING(MAWK, optarg);
mawk_cellcpy(MAWK, &MAWK->fs_shadow, MAWK_FS);
mawk_cast_for_split(MAWK, &MAWK->fs_shadow);
break;
case '-':
if (argv[i][2] != 0) {
if ((strcmp(argv[i], "--version") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("\nlmawk is libmawk " LMAWK_VER "\nFor more info, see the manual page for lmawk(1) and\nhttp://repo.hu/projects/libmawk.\n\n");
exit(0);
}
bad_option(MAWK, argv[i]);
}
i++;
goto no_more_opts;
#ifndef MAWK_NO_COMP
case 'f':
mawk_append_input_file(MAWK, optarg, 0);
break;
#endif
case 'b':
mawk_append_input_file(MAWK, optarg, 1);
break;
default:
bad_option(MAWK, argv[i]);
}
}
no_more_opts:
if (MAWK->ps.pfile_name) { /* program from -f or -b */
set_ARGV(MAWK, argc, argv, i);
#ifdef MAWK_NO_COMP
mawk_load_bins(MAWK);
#else
mawk_scan_init(MAWK, (char *) 0);
#endif
if (MAWK->do_exit)
return 0;
}
else { /* program given on command line (no -f or -b) */
if (i == argc) {
if (!MAWK->no_program_ok) {
no_program(MAWK);
return 0;
}
}
set_ARGV(MAWK, argc, argv, i + 1);
#ifndef MAWK_NO_COMP
if (i != argc)
mawk_scan_init(MAWK, argv[i]);
else
mawk_scan_init(MAWK, "BEGIN {}");
#else
bad_option(MAWK, argv[i]);
return 1;
#endif
if (MAWK->do_exit)
return 0;
/* #endif */
}
return 1;
}
static void set_ARGV(mawk_state_t *MAWK, int argc, char **argv, int i)
{
/* argv[i] = ARGV[i] */
SYMTAB *st_p;
mawk_cell_t idx, cl;
st_p = mawk_insert(MAWK, "ARGV");
st_p->type = ST_ARRAY;
st_p->stval.array = MAWK->Argv;
/* store progran name in ARGV[] */
idx.type = C_NUM;
idx.d.dval = MAWK_NUM_ZERO;
cl.type = C_STRING;
cl.ptr = (PTR) mawk_new_STRING(MAWK, MAWK->progname);
mawk_array_set(MAWK, st_p->stval.array, &idx, &cl);
free_STRING((mawk_string_t *)cl.ptr);
/* ARGV[0] is set, do the rest
The type of ARGV[1] ... should be C_MBSTRN
because the user might enter numbers from the command line */
for (idx.d.dval = MAWK_NUM_ONE; i < argc; i++, idx.d.dval += MAWK_NUM_ONE) {
cl.type = C_MBSTRN;
cl.ptr = (PTR) mawk_new_STRING(MAWK, argv[i]);
mawk_array_set(MAWK, st_p->stval.array, &idx, &cl);
free_STRING((mawk_string_t *)cl.ptr);
}
ARGC->type = C_NUM;
ARGC->d.dval = idx.d.dval;
}