/********************************************
mawk.h
libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991-94, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
#ifndef MAWK_H
#define MAWK_H
#include <libmawk/conf.h>
#include <stdlib.h>
#include <libmawk/nstd.h>
typedef struct mawk_vio_s mawk_vio_t;
typedef enum mawk_errno_e {
MAWK_ESUCCES = 0,
MAWK_ECANTOPEN = -1,
MAWK_EHDRSIZE = -2,
MAWK_EFILEMAGIC = -3,
MAWK_EBYTEORDER = -4,
MAWK_EVERSION = -5,
MAWK_EINSTSIZE = -6,
MAWK_ENUMSIZE = -7,
MAWK_EALLOC = -8,
MAWK_EWRONGVAL = -9,
MAWK_Elast = -9
} mawk_errno_t;
const char *mawk_strerror(mawk_errno_t err);
typedef struct mawk_state_s mawk_state_t;
typedef struct fcall FCALL_REC;
#include <libmawk/types.h>
#include <libmawk/bi_vars.h>
#include <libmawk/vio.h>
#define NUM_PFIELDS 5
#define SPRINTF_SZ sizeof(MAWK->tempbuff)
typedef struct pfile {
struct pfile *link;
const char *fname;
char bytecode; /* 1 if file is expected to be bytecode */
} PFILE;
typedef struct mawk_fin_s mawk_input_t;
typedef struct array *mawk_array_t;
/* array implementation callbacks; default implementation is in array_orig.c */
/* look up index mawk_cell_t in mawk_array_t and return 1 if it exists. If result is non-NULL,
it is first destroyed (regardless of whether the index exists in the array)
and if the index exists, its value is copied into result. The caller has to
destroy result after the call. Modifications to result will not affect the array.
NOTE: idx may be the same pointer as result: it's guaranteed that result is
destroyed after indexing.
If create is 1, a non-existing member is created with empty value
*/
typedef int mawk_array_find_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *result, int create);
/* set a member of the mawk_array_t at idx to val */
typedef void mawk_array_set_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *val);
/* delete a single index (mawk_cell_t) from the array; called by "delete A[i]" */
typedef void mawk_array_delete_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx);
/* free all elements of the array and clean the array; the array should
be a valid empty array after the operation */
typedef void mawk_array_clear_t(mawk_state_t *MAWK, mawk_array_t arr);
/* list and return all indices in a string array; used by "for(a in ARR)"
must return a pointer alloced by mawk_malloc() or NULL if the array is empty */
typedef mawk_string_t **mawk_array_loop_vector_t(mawk_state_t *MAWK, mawk_array_t arr, unsigned *vsize);
/* load the array from MAWK->split_ov_list (called exclusively from split()) */
typedef void mawk_array_load_t(mawk_state_t *MAWK, mawk_array_t arr, int cnt);
/* optional call for _generic: iteration; no change is done to the array during
the iteration, except for deleting the last returned element in clear() */
/* start iterating over all members, returns iterator */
typedef void *mawk_array_it_start_t(mawk_state_t *MAWK, mawk_array_t arr);
/* get index of the next member; returns NULL at the end */
typedef const mawk_cell_t *mawk_array_it_next_t(mawk_state_t *MAWK, mawk_array_t arr, void *iterator);
/* called after the last element or if the iteration is to be stopped */
typedef void mawk_array_it_stop_t(mawk_state_t *MAWK, mawk_array_t arr, void *iterator);
/* a whole implementation conists of all of the above: */
typedef struct array_imp_s {
/* any array implementation has to provide these: */
mawk_array_find_t *find;
mawk_array_set_t *set;
mawk_array_delete_t *delet;
/* option A: manual implementation of these: */
mawk_array_clear_t *clear;
mawk_array_loop_vector_t *loop_vect;
mawk_array_load_t *load;
/* option B: use array_generic and provide an iterator: */
mawk_array_it_start_t *it_start;
mawk_array_it_next_t *it_next;
mawk_array_it_stop_t *it_stop;
} array_imp_t;
/* an actual array */
struct array {
array_imp_t imp; /* implementation */
PTR ptr; /* What this points to depends on the type and implementation */
unsigned size; /* number of elts in the table */
unsigned limit; /* Meaning depends on type and implementation */
unsigned hmask; /* bitwise and with hash value to get table index */
short type; /* values in AY_NULL .. AY_SPLIT */
union { /* state for custom (non-orig) impelementations; 0;NULL by default */
int i;
void *p;
} state;
};
typedef struct {
INST *base, *limit, *warn, *ptr;
} CODEBLOCK;
/*------------------------
user defined functions
------------------------*/
typedef struct fblock FBLOCK;
struct fblock {
const char *name;
INST *code;
unsigned size; /* allocated size for proper cleanup with zfree() */
unsigned short nargs;
char *typev; /* array of size nargs holding types */
FBLOCK *c_next; /* linked list of all c function call blocks compiled into the code so they can be free'd at the end; see also: MAWK->c_calls */
}; /* function block */
typedef struct jmp {
struct jmp *link;
int source_offset;
} JMP;
typedef struct bc {
struct bc *link; /* stack as linked list */
int type; /* 'B' or 'C' or mark start with 0 */
int source_offset; /* position of _JMP */
} BC;
/* a stack to hold some pieces of code while
reorganizing loops .
*/
typedef struct mc { /* mc -- move code */
struct mc *link;
INST *code; /* the save code */
unsigned len; /* its length */
int scope; /* its scope */
int move_level; /* size of this stack when coded */
FBLOCK *fbp; /* if scope FUNCT */
int offset; /* distance from its code base */
} MC;
struct child {
int pid;
int exit_status;
struct child *link;
};
#define SAFETY 16
#define DANGER (EVAL_STACK_SIZE-SAFETY)
#define ET_END 9
typedef struct {
char in, out;
} mawk_escape_t;
struct mawk_fdump {
struct mawk_fdump *link;
FBLOCK *fbp;
};
/* We store dynamically created files on a linked linear
list with move to the front (big surprise) */
typedef struct file_node_s {
struct file_node_s *link;
mawk_string_t *name;
short type;
/* direct file IO for output or direct input */
mawk_vio_t *vf;
/* .. or buffered (FIN) */
mawk_input_t *fin;
} FILE_NODE;
typedef struct {
char type;
char c;
PTR ptr; /* mawk_string_t* or RE machine* */
} SEPARATOR;
/* struct to hold info about builtins */
typedef struct {
char *name;
PF_CP fp; /* ptr to function that does the builtin */
unsigned char min_args, max_args;
/* info for parser to check correct number of arguments */
} BI_REC;
typedef struct {
const char *name;
char type;
unsigned char offset; /* offset in stack frame for local vars */
union {
mawk_cell_t *cp;
int kw;
PF_CP fp;
const BI_REC *bip;
mawk_array_t array;
FBLOCK *fbp;
struct {
mawk_cell_t *(*callback) (mawk_state_t * context, mawk_cell_t * sp, int a_args);
void *func_userdata;
} c_function;
} stval;
} SYMTAB;
typedef struct hash {
struct hash *link;
SYMTAB symtab;
} HASHNODE;
#define POOLSZ 16
#define ZBLOCKSZ 8
#define ZSHIFT 3
typedef union zblock {
char dummy[ZBLOCKSZ];
union zblock *link;
} ZBLOCK;
/* ZBLOCKS of sizes 1, 2, ... 16
which is bytes of sizes 8, 16, ... , 128
are stored on the linked linear lists in
pool[0], pool[1], ... , pool[15]
*/
typedef struct re_node {
mawk_string_t *sval;
PTR re;
struct re_node *link;
} RE_NODE;
typedef struct repl_node {
struct repl_node *link;
mawk_string_t *sval; /* the input */
mawk_cell_t *cp; /* the output */
} REPL_NODE;
typedef struct spov {
struct spov *link;
mawk_string_t *sval;
} SPLIT_OV;
/* ---------------------------------------------------------------------- */
typedef struct mawk_debug_callstack_s mawk_debug_callstack_t;
struct mawk_debug_callstack_s {
FBLOCK *f;
mawk_debug_callstack_t *next;
};
typedef struct mawk_parse_state_s {
const char *pfile_name; /* program input file */
char pfile_bytecode; /* 1 if program input file is expected to be bytecode */
int code_move_level; /* used as part of unique identification of context when moving code. Global for communication with parser. */
mawk_string_t *program_string;
unsigned char *buffer;
unsigned char *buffp;
/* unsigned so it works with 8 bit chars */
FILE_NODE *program_fin;
int eof_flag;
} mawk_parse_state_t;
typedef struct mawk_mm_s mawk_mm_t;
struct mawk_mm_s {
mawk_mm_t *prev, *next;
int size;
char data[1]; /* actual data */
} ;
/* regex lib */
typedef unsigned char mawk_BV[32]; /* bit vector */
typedef struct {
char type;
unsigned char len; /* used for M_STR */
union {
char *str; /* string */
mawk_BV *bvp; /* class */
int jump;
} data;
} mawk_RESTATE;
/* function callback type: this how execute() can call external C functions */
typedef mawk_cell_t *libmawk_c_function(mawk_state_t *context, mawk_cell_t * sp, int a_args);
/* struct for the run time stack */
typedef struct {
mawk_RESTATE *m; /* save the machine ptr */
int u; /* save the u_flag */
char *s; /* save the active string ptr */
char *ss; /* save the match start -- only used by mawk_REmatch */
} mawk_RT_STATE; /* run time state */
struct mawk_state_s {
#ifdef DEBUG
#define YYDEBUG 1
int yydebug; /* print parse if on */
int dump_RE;
#endif
short posix_space_flag, interactive_flag;
/* a well known string */
mawk_string_t null_str;
/* a useful scratch area */
union {
mawk_string_t *_split_buff[MAX_SPLIT];
char _string_buff[MIN_SPRINTF];
} tempbuff;
/* help with casts */
unsigned long *mpow2;
mawk_cell_t field[FBANK_SZ + NUM_PFIELDS];
/* $0, $1 ... $(MAX_SPLIT), NF, RS, RS, CONVFMT, OFMT */
/* more fields if needed go here */
mawk_cell_t *fbank[NUM_FBANK]; /* fbank[0] == field */
/* these are used by the parser, scanner and error messages
from the compile */
PFILE *pfile_list, *pfile_list_tail;
int current_token;
unsigned token_lineno; /* lineno of current token */
unsigned compile_error_count;
int paren_cnt, brace_cnt;
int print_flag, getline_flag;
short mawk_state;
char *progname; /* for error messages */
unsigned rt_nr, rt_fnr; /* ditto */
/* this can be moved and enlarged by -W sprintf=num */
char *sprintf_buff;
char *sprintf_limit;
FILE_NODE *main_input;
mawk_array_t Argv; /* to the user this is ARGV */
mawk_num_t argi; /* index of next ARGV[argi] to try to open */
unsigned lineno;
int NR_flag; /* are we tracking NR */
CODEBLOCK active_code;
CODEBLOCK *main_code_p, *begin_code_p, *end_code_p;
INST *begin_start, *main_start, *end_start, *end_start_orig;
unsigned begin_size, main_size, end_size;
INST *execution_start;
int dump_code_flag; /* if on dump internal code */
int dump_sym_flag; /* if on dump internal symbols */
INST *restart_label; /* control flow labels */
INST *next_label;
mawk_cell_t tc; /*useful temp */
int scope;
FBLOCK *active_funct; /* when scope is SCOPE_FUNCT */
JMP *jmp_top;
BC *bc_top;
MC *mc_top;
mawk_parse_state_t ps; /* current parse state */
mawk_array_t scripts_loaded; /* a hash indeced by full paths of scripts already loaded */
mawk_parse_state_t *mawk_parser_stack; /* parse state stack for "include" */
int pstack_alloced, pstack_used;
int check_progress; /* flag that indicates call_arg_check() was able to type check some call arguments */
struct child *child_list; /* dead children are kept on this list */
unsigned repl_cnt; /* number of global replacements */
long seed; /* must be >=1 and < 2^31-1 */
mawk_cell_t cseed; /* argument of last call to srand() */
mawk_cell_t eval_stack[EVAL_STACK_SIZE];
mawk_cell_t *sp;
mawk_cell_t *stack_base; /* these can move for deep recursion */
mawk_cell_t *stack_danger;
int exit_code, final_exit_code, rt_exit_code;
# ifdef HAVE_STRTOD_OVF_BUG
fp_except entry_mask;
fp_except working_mask;
# endif
mawk_escape_t escape_test[ET_END + 1];
struct mawk_fdump *fdump_list; /* linked list of all user functions */
FILE_NODE *file_list;
char *shell; /* hardwire to /bin/sh for portability of programs */
int max_field; /* maximum field actually created */
/* a description of how to split based on RS.
If RS is changed, so is rs_shadow */
SEPARATOR rs_shadow;
/* a splitting mawk_cell_t version of FS */
mawk_cell_t fs_shadow;
int nf; /* nf holds the true value of NF. If nf < 0 , then NF has not been computed, i.e., $0 has not been split */
HASHNODE *hash_table[HASH_PRIME];
HASHNODE *save_list; /* when processing user functions, global ids which are replaced by local ids are saved on this list */
unsigned last_hash;
/* large block allocator (memory accounting and free-later mechanism) */
mawk_mm_t *mawk_mm_head;
int mm_used, mm_max;
/* small block allocator in zmalloc.[ch] (pooling) */
ZBLOCK *pool[POOLSZ]; /* pool of blocks already free'd, indexed by size (in blocks) */
unsigned amt_avail; /* how many blocks are unclaimed at the end of ->avail */
ZBLOCK *avail; /* the chunk we split up for new allocations */
RE_NODE *re_list; /* a list of compiled regular expressions */
REPL_NODE *repl_list; /* here's our old friend linked linear list with move to the front for compilation of replacement CELLs */
char scan_code[256];
SPLIT_OV *split_ov_list;
libmawk_c_function *func_being_called; /* the C function that's being called back from execute() */
void *func_userdata; /* during calls to C functions, func_userdata has the value that it had during registration of that function (it's saved and restored) */
void *ctx_userdata; /* set by the user, never touched by libmawk */
int last_token_lineno; /* last token line number to detect source line change for adding debug info */
mawk_debug_callstack_t *debug_callstack;
FCALL_REC *resolve_list;
void *lvalp;
mawk_cell_t code_call_id_dummy;
mawk_cell_t bi_vars[NUM_BI_VAR];
/* regex lib state */
int REerrno;
mawk_RT_STATE *RE_run_stack_base;
mawk_RT_STATE *RE_run_stack_limit;
mawk_RT_STATE *RE_run_stack_empty; /* Large model DOS segment arithemetic breaks the current stack. This hack fixes it without rewriting the whole thing, 5/31/91 */
mawk_BV **REbv_base, **REbv_limit;
mawk_BV **REbv_next; /* next empty slot in the array */
int REbv_alloced;
int REprev;
unsigned RElen;
char *RElp; /* ptr to reg exp string */
unsigned long runlimit; /* how many instructions to run before returning; 0 means "unlimited" (2^32) */
/* should be a bitfield! */
int debug_symbols; /* add location infoand other debug symbol data to the code */
int separate_begin; /* if not zero, after running BEGIN blocks, no main block is automaticly executed */
int suppress_undefined_function_warning; /* if not zero, do not warn about functions undefined */
int no_program_ok; /* it is ok if there's no program after processing argv[] */
int do_exit; /* non-zero if we should exit immediately (added for exiting from the parser) */
int wants_to_exit; /* non-zero if a script decied to exit but libmawk didn't really stop it (doesn't happen in main.c but happens with libmawk.c) */
int binary_loaded; /* non-zero if no text parsing is required (binary file has been loaded) */
/* hooks */
const char *(*file_name_rewrite)(const char *orig_name, char *buff, int buff_size, int type); /* called any time the script wants to open a new file (print redirection or getline); return orig_name or buff after filling in a new file name there or another string const (won't be freed); return NULL to deny opening the file */
mawk_vio_init_t vio_init;
FILE_NODE *fnode_stdin, *fnode_stdout, *fnode_stderr;
FBLOCK *c_funcs; /* list of c function calls - to be free'd on uninit */
};
#define EXECUTION 1 /* other state is 0 compiling */
/* anonymous union */
#define string_buff MAWK->tempbuff._string_buff
#define split_buff MAWK->tempbuff._split_buff
/* prototypes */
void mawk_cast1_to_str(mawk_state_t *, mawk_cell_t *);
void mawk_cast1_to_num(mawk_state_t *, mawk_cell_t *);
void mawk_cast2_to_str(mawk_state_t *, mawk_cell_t *);
void mawk_cast2_to_num(mawk_state_t *, mawk_cell_t *);
void mawk_cast_to_RE(mawk_state_t *, mawk_cell_t *);
void mawk_cast_for_split(mawk_state_t *, mawk_cell_t *);
void mawk_check_strnum(mawk_state_t *, mawk_cell_t *);
void mawk_cast_to_REPL(mawk_state_t *, mawk_cell_t *);
#define d_to_i(d) ((int)mawk_d_to_I(d))
int test(mawk_state_t *, mawk_cell_t *); /* test for null non-null */
mawk_cell_t *repl_cpy(mawk_cell_t *, mawk_cell_t *);
void DB_cell_destroy(mawk_state_t *, mawk_cell_t *);
void overflow(mawk_state_t *, char *, unsigned);
void mawk_rt_overflow(mawk_state_t * MAWK, char *, unsigned);
void mawk_rt_error(mawk_state_t *, const char *, ...);
void mawk_set_errno(mawk_state_t * MAWK, const char *error);
void mawk_exit_(mawk_state_t *, int);
#define mawk_exitval(MAWK, x, RETVAL) \
do { \
mawk_exit_(MAWK, x); \
return RETVAL; \
} while(0);
#define mawk_exit(MAWK, x) \
do { \
mawk_exit_(MAWK, x); \
return; \
} while(0);
void mawk_da(mawk_state_t *, INST *, void *);
char *mawk_str_str(char *, char *, unsigned);
char *mawk_rm_escape(mawk_state_t *, char *);
char *mawk_re_pos_match(mawk_state_t *, char *, PTR, unsigned *);
int mawk_binmode(void);
void mawk_bozo(mawk_state_t *, char *);
void mawk_errmsg(mawk_state_t *, int, char *, ...);
void mawk_compile_error(mawk_state_t *, const char *, ...);
void mawk_execute(mawk_state_t *, INST *, mawk_cell_t *, mawk_cell_t *);
const char *mawk_find_kw_str(int);
void mawk_overflow(mawk_state_t * MAWK, char *s, unsigned size);
void mawk_bi_vars_init(mawk_state_t * MAWK);
void mawk_bi_funct_init(mawk_state_t * MAWK);
#ifdef MAKW_MEM_PEDANTIC
void mawk_bi_funct_uninit(mawk_state_t *MAWK)
#endif
void mawk_code_init(mawk_state_t *MAWK);
void mawk_parse(mawk_state_t *);
#ifndef MAWK_NO_FLOAT
# ifdef HAVE_STRTOD_OVF_BUG
double strtod_with_ovf_bug(const char *, char **);
# define strtod strtod_with_ovf_bug
# endif
#endif
#endif /* MAWK_H */