Codebase list libmawk / debian/1.0.0-1 src / libmawk / mawk.h
debian/1.0.0-1

Tree @debian/1.0.0-1 (Download .tar.gz)

mawk.h @debian/1.0.0-1raw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
/********************************************
mawk.h

libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:

copyright 1991-94, Michael D. Brennan

This is a source file for mawk, an implementation of
the AWK programming language.

Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/

#ifndef  MAWK_H
#define  MAWK_H

#include <libmawk/conf.h>
#include <stdlib.h>
#include <libmawk/nstd.h>

typedef struct mawk_vio_s mawk_vio_t;


typedef enum mawk_errno_e {
	MAWK_ESUCCES    = 0,
	MAWK_ECANTOPEN  = -1,
	MAWK_EHDRSIZE   = -2,
	MAWK_EFILEMAGIC = -3,
	MAWK_EBYTEORDER = -4,
	MAWK_EVERSION   = -5,
	MAWK_EINSTSIZE  = -6,
	MAWK_ENUMSIZE   = -7,
	MAWK_EALLOC     = -8,
	MAWK_EWRONGVAL  = -9,

	MAWK_Elast      = -9
} mawk_errno_t;
const char *mawk_strerror(mawk_errno_t err);

typedef struct mawk_state_s mawk_state_t;
typedef struct fcall  FCALL_REC;

#include <libmawk/types.h>
#include <libmawk/bi_vars.h>
#include <libmawk/vio.h>

#define  NUM_PFIELDS		5

#define  SPRINTF_SZ	sizeof(MAWK->tempbuff)

typedef struct pfile {
	struct pfile *link;
	const char *fname;
	char bytecode; /* 1 if file is expected to be bytecode */
} PFILE;

typedef struct mawk_fin_s mawk_input_t;

typedef struct array  *mawk_array_t;

/* array implementation callbacks; default implementation is in array_orig.c */

/* look up index mawk_cell_t in mawk_array_t and return 1 if it exists. If result is non-NULL,
   it is first destroyed (regardless of whether the index exists in the array)
   and if the index exists, its value is copied into result. The caller has to
   destroy result after the call. Modifications to result will not affect the array.
   NOTE: idx may be the same pointer as result: it's guaranteed that result is
   destroyed after indexing.

   If create is 1, a non-existing member is created with empty value
   */
typedef int mawk_array_find_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *result, int create);

/* set a member of the mawk_array_t at idx to val */
typedef void mawk_array_set_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *val);

/* delete a single index (mawk_cell_t) from the array; called by "delete A[i]" */
typedef void mawk_array_delete_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx);

/* free all elements of the array and clean the array; the array should
   be a valid empty array after the operation */
typedef void mawk_array_clear_t(mawk_state_t *MAWK, mawk_array_t arr);

/* list and return all indices in a string array; used by "for(a in ARR)"
   must return a pointer alloced by mawk_malloc() or NULL if the array is empty */
typedef mawk_string_t **mawk_array_loop_vector_t(mawk_state_t *MAWK, mawk_array_t arr, unsigned *vsize);

/* load the array from MAWK->split_ov_list (called exclusively from split()) */
typedef void mawk_array_load_t(mawk_state_t *MAWK, mawk_array_t arr, int cnt);

/* optional call for _generic: iteration; no change is done to the array during
   the iteration, except for deleting the last returned element in clear() */
/* start iterating over all members, returns iterator */
typedef void *mawk_array_it_start_t(mawk_state_t *MAWK, mawk_array_t arr);
/* get index of the next member; returns NULL at the end */
typedef const mawk_cell_t *mawk_array_it_next_t(mawk_state_t *MAWK, mawk_array_t arr, void *iterator);
/* called after the last element or if the iteration is to be stopped */
typedef void mawk_array_it_stop_t(mawk_state_t *MAWK, mawk_array_t arr, void *iterator);


/* a whole implementation conists of all of the above: */
typedef struct array_imp_s {
	/* any array implementation has to provide these: */
	mawk_array_find_t           *find;
	mawk_array_set_t            *set;
	mawk_array_delete_t         *delet;
	/* option A: manual implementation of these: */
	mawk_array_clear_t          *clear;
	mawk_array_loop_vector_t    *loop_vect;
	mawk_array_load_t           *load;
	/* option B: use array_generic and provide an iterator: */
	mawk_array_it_start_t       *it_start;
	mawk_array_it_next_t        *it_next;
	mawk_array_it_stop_t        *it_stop;
} array_imp_t;

/* an actual array */
struct array {
	array_imp_t imp;              /* implementation */
	PTR ptr;											/* What this points to depends on the type and implementation */
	unsigned size;								/* number of elts in the table */
	unsigned limit;								/* Meaning depends on type and implementation */
	unsigned hmask;								/* bitwise and with hash value to get table index */
	short type;										/* values in AY_NULL .. AY_SPLIT */

	union {                       /* state for custom (non-orig) impelementations; 0;NULL by default */
		int i;
		void *p;
	} state;
};

typedef struct {
	INST *base, *limit, *warn, *ptr;
} CODEBLOCK;

/*------------------------
  user defined functions
  ------------------------*/

typedef struct fblock FBLOCK;

struct fblock {
	const char *name;
	INST *code;
	unsigned size; /* allocated size for proper cleanup with zfree() */
	unsigned short nargs;
	char *typev;									/* array of size nargs holding types */

	FBLOCK *c_next; /* linked list of all c function call blocks compiled into the code so they can be free'd at the end; see also: MAWK->c_calls */
};												/* function block */

typedef struct jmp {
	struct jmp *link;
	int source_offset;
} JMP;

typedef struct bc {
	struct bc *link;							/* stack as linked list */
	int type;											/* 'B' or 'C' or mark start with 0 */
	int source_offset;						/* position of _JMP  */
} BC;

/* a stack to hold some pieces of code while
   reorganizing loops .
*/

typedef struct mc {							/* mc -- move code */
	struct mc *link;
	INST *code;										/* the save code */
	unsigned len;									/* its length */
	int scope;										/* its scope */
	int move_level;								/* size of this stack when coded */
	FBLOCK *fbp;									/* if scope FUNCT */
	int offset;										/* distance from its code base */
} MC;

struct child {
	int pid;
	int exit_status;
	struct child *link;
};

#define	 SAFETY	   16
#define	 DANGER	   (EVAL_STACK_SIZE-SAFETY)

#define	 ET_END	    9
typedef struct {
	char in, out;
} mawk_escape_t;

struct mawk_fdump {
	struct mawk_fdump *link;
	FBLOCK *fbp;
};

/* We store dynamically created files on a linked linear
   list with move to the front (big surprise)  */

typedef struct file_node_s {
	struct file_node_s *link;
	mawk_string_t *name;
	short type;

	/* direct file IO for output or direct input */
	mawk_vio_t *vf;

	/* .. or buffered (FIN) */
	mawk_input_t *fin;
} FILE_NODE;

typedef struct {
	char type;
	char c;
	PTR ptr;											/* mawk_string_t* or RE machine* */
} SEPARATOR;

/* struct to hold info about builtins */
typedef struct {
	char *name;
	PF_CP fp;											/* ptr to function that does the builtin */
	unsigned char min_args, max_args;
/* info for parser to check correct number of arguments */
} BI_REC;

typedef struct {
	const char *name;
	char type;
	unsigned char offset;					/* offset in stack frame for local vars */
	union {
		mawk_cell_t *cp;
		int kw;
		PF_CP fp;
		const BI_REC *bip;
		mawk_array_t array;
		FBLOCK *fbp;
		struct {
			mawk_cell_t *(*callback) (mawk_state_t * context, mawk_cell_t * sp, int a_args);
			void *func_userdata;
		} c_function;
	} stval;
} SYMTAB;

typedef struct hash {
	struct hash *link;
	SYMTAB symtab;
} HASHNODE;

#define POOLSZ	    16
#define ZBLOCKSZ    8
#define ZSHIFT      3
typedef union zblock {
	char dummy[ZBLOCKSZ];
	union zblock *link;
} ZBLOCK;

/* ZBLOCKS of sizes 1, 2, ... 16
   which is bytes of sizes 8, 16, ... , 128
   are stored on the linked linear lists in
   pool[0], pool[1], ... , pool[15]
*/

typedef struct re_node {
	mawk_string_t *sval;
	PTR re;
	struct re_node *link;
} RE_NODE;

typedef struct repl_node {
	struct repl_node *link;
	mawk_string_t *sval;									/* the input */
	mawk_cell_t *cp;											/* the output */
} REPL_NODE;

typedef struct spov {
	struct spov *link;
	mawk_string_t *sval;
} SPLIT_OV;

/* ---------------------------------------------------------------------- */

typedef struct mawk_debug_callstack_s mawk_debug_callstack_t;

struct mawk_debug_callstack_s {
	FBLOCK *f;
	mawk_debug_callstack_t *next;
};

typedef struct mawk_parse_state_s {
	const char *pfile_name;				/* program input file */
	char pfile_bytecode; /* 1 if program input file is expected to be bytecode */
	int code_move_level;					/* used as part of unique identification of context when moving code.  Global for communication with parser. */
	mawk_string_t *program_string;
	unsigned char *buffer;
	unsigned char *buffp;
	/* unsigned so it works with 8 bit chars */
	FILE_NODE *program_fin;
	int eof_flag;
} mawk_parse_state_t;


typedef struct mawk_mm_s mawk_mm_t;

struct mawk_mm_s {
	mawk_mm_t *prev, *next;
	int size;
	char data[1];                     /* actual data */
} ;


/* regex lib */
typedef unsigned char mawk_BV[32];		/* bit vector */

typedef struct {
	char type;
	unsigned char len;						/* used for M_STR  */
	union {
		char *str;									/* string */
		mawk_BV *bvp;										/*  class  */
		int jump;
	} data;
} mawk_RESTATE;

/* function callback type: this how execute() can call external C functions */
typedef mawk_cell_t *libmawk_c_function(mawk_state_t *context, mawk_cell_t * sp, int a_args);

/* struct for the run time stack */
typedef struct {
	mawk_RESTATE *m;							/*   save the machine ptr */
	int u;												/*   save the u_flag */
	char *s;											/*   save the active string ptr */
	char *ss;											/*   save the match start -- only used by mawk_REmatch */
} mawk_RT_STATE;								/* run time state */

struct mawk_state_s {

#ifdef   DEBUG
#define  YYDEBUG  1
	int yydebug;									/* print parse if on */
	int dump_RE;
#endif

	short posix_space_flag, interactive_flag;

/* a well known string */
	mawk_string_t null_str;

/* a useful scratch area */
	union {
		mawk_string_t *_split_buff[MAX_SPLIT];
		char _string_buff[MIN_SPRINTF];
	} tempbuff;


	/* help with casts */
	unsigned long *mpow2;

	mawk_cell_t field[FBANK_SZ + NUM_PFIELDS];
	/* $0, $1 ... $(MAX_SPLIT), NF, RS, RS, CONVFMT, OFMT */

	/* more fields if needed go here */
	mawk_cell_t *fbank[NUM_FBANK];				/* fbank[0] == field */


	/* these are used by the parser, scanner and error messages
	   from the compile  */

	PFILE *pfile_list, *pfile_list_tail;

	int current_token;
	unsigned token_lineno;				/* lineno of current token */
	unsigned compile_error_count;
	int paren_cnt, brace_cnt;
	int print_flag, getline_flag;
	short mawk_state;
	char *progname;								/* for error messages */
	unsigned rt_nr, rt_fnr;				/* ditto */

	/* this can be moved and enlarged  by -W sprintf=num  */
	char *sprintf_buff;
	char *sprintf_limit;

	FILE_NODE *main_input;
	mawk_array_t Argv;										/* to the user this is ARGV  */
	mawk_num_t argi;										/* index of next ARGV[argi] to try to open */
	unsigned lineno;
	int NR_flag;									/* are we tracking NR */

	CODEBLOCK active_code;
	CODEBLOCK *main_code_p, *begin_code_p, *end_code_p;
	INST *begin_start, *main_start, *end_start, *end_start_orig;
	unsigned begin_size, main_size, end_size;
	INST *execution_start;

	int dump_code_flag;						/* if on dump internal code */
	int dump_sym_flag;						/* if on dump internal symbols */

	INST *restart_label;					/* control flow labels */
	INST *next_label;
	mawk_cell_t tc;											/*useful temp */
	int scope;
	FBLOCK *active_funct;					/* when scope is SCOPE_FUNCT  */
	JMP *jmp_top;
	BC *bc_top;
	MC *mc_top;

	mawk_parse_state_t ps;				/* current parse state */
	mawk_array_t scripts_loaded;					/* a hash indeced by full paths of scripts already loaded */
	mawk_parse_state_t *mawk_parser_stack;	/* parse state stack for "include" */
	int pstack_alloced, pstack_used;

	int check_progress;						/* flag that indicates call_arg_check() was able to type check some call arguments */
	struct child *child_list;			/* dead children are kept on this list */
	unsigned repl_cnt;						/* number of global replacements */
	long seed;										/* must be >=1 and < 2^31-1 */
	mawk_cell_t cseed;										/* argument of last call to srand() */
	mawk_cell_t eval_stack[EVAL_STACK_SIZE];
	mawk_cell_t *sp;
	mawk_cell_t *stack_base;							/* these can move for deep recursion */
	mawk_cell_t *stack_danger;
	int exit_code, final_exit_code, rt_exit_code;
#	ifdef   HAVE_STRTOD_OVF_BUG
	fp_except entry_mask;
	fp_except working_mask;
#	endif
	mawk_escape_t escape_test[ET_END + 1];
	struct mawk_fdump *fdump_list;	/* linked list of all user functions */
	FILE_NODE *file_list;
	char *shell;									/* hardwire to /bin/sh for portability of programs */

	int max_field;								/* maximum field actually created */
/* a description of how to split based on RS.
   If RS is changed, so is rs_shadow */
	SEPARATOR rs_shadow;
	/* a splitting mawk_cell_t version of FS */
	mawk_cell_t fs_shadow;
	int nf;												/* nf holds the true value of NF.  If nf < 0 , then NF has not been computed, i.e., $0 has not been split */
	HASHNODE *hash_table[HASH_PRIME];
	HASHNODE *save_list;					/* when processing user functions,  global ids which are replaced by local ids are saved on this list */
	unsigned last_hash;

	/* large block allocator (memory accounting and free-later mechanism) */
	mawk_mm_t *mawk_mm_head;
	int mm_used, mm_max;

	/* small block allocator in zmalloc.[ch] (pooling) */
	ZBLOCK *pool[POOLSZ];         /* pool of blocks already free'd, indexed by size (in blocks) */
	unsigned amt_avail;           /* how many blocks are unclaimed at the end of ->avail */
	ZBLOCK *avail;                /* the chunk we split up for new allocations */

	RE_NODE *re_list;							/* a list of compiled regular expressions */
	REPL_NODE *repl_list;					/* here's our old friend linked linear list with move to the front for compilation of replacement CELLs   */
	char scan_code[256];
	SPLIT_OV *split_ov_list;

	libmawk_c_function *func_being_called; /* the C function that's being called back from execute() */

	void *func_userdata;                /* during calls to C functions, func_userdata has the value that it had during registration of that function (it's saved and restored) */
	void *ctx_userdata;                 /* set by the user, never touched by libmawk */

	int last_token_lineno;				/* last token line number to detect source line change for adding debug info */
	mawk_debug_callstack_t *debug_callstack;

	FCALL_REC *resolve_list;
	void *lvalp;
	mawk_cell_t code_call_id_dummy;

	mawk_cell_t bi_vars[NUM_BI_VAR];


	/* regex lib state */
	int REerrno;
	mawk_RT_STATE *RE_run_stack_base;
	mawk_RT_STATE *RE_run_stack_limit;
	mawk_RT_STATE *RE_run_stack_empty; /* Large model DOS segment arithemetic breaks the current stack. This hack fixes it without rewriting the whole thing, 5/31/91 */
	mawk_BV **REbv_base, **REbv_limit;
	mawk_BV **REbv_next;               /* next empty slot in the array */
	int REbv_alloced;
	int REprev;
	unsigned RElen;
	char *RElp;                        /*  ptr to reg exp string  */

	unsigned long runlimit;            /* how many instructions to run before returning; 0 means "unlimited" (2^32) */

	/* should be a bitfield! */
	int debug_symbols;						/* add location infoand other debug symbol data to the code */
	int separate_begin;						/* if not zero, after running BEGIN blocks, no main block is automaticly executed */
	int suppress_undefined_function_warning;	/* if not zero, do not warn about functions undefined */
	int no_program_ok;            /* it is ok if there's no program after processing argv[] */

	int do_exit;									/* non-zero if we should exit immediately (added for exiting from the parser) */
	int wants_to_exit;            /* non-zero if a script decied to exit but libmawk didn't really stop it (doesn't happen in main.c but happens with libmawk.c) */
	int binary_loaded;            /* non-zero if no text parsing is required (binary file has been loaded) */

	/* hooks */
	const char *(*file_name_rewrite)(const char *orig_name, char *buff, int buff_size, int type); /* called any time the script wants to open a new file (print redirection or getline); return orig_name or buff after filling in a new file name there or another string const (won't be freed); return NULL to deny opening the file */
	mawk_vio_init_t vio_init;

	FILE_NODE *fnode_stdin, *fnode_stdout, *fnode_stderr;
	FBLOCK *c_funcs; /* list of c function calls - to be free'd on uninit */
};

#define EXECUTION       1				/* other state is 0 compiling */

	/* anonymous union */
#define  string_buff	MAWK->tempbuff._string_buff
#define  split_buff	MAWK->tempbuff._split_buff


/*  prototypes  */

void mawk_cast1_to_str(mawk_state_t *, mawk_cell_t *);
void mawk_cast1_to_num(mawk_state_t *, mawk_cell_t *);
void mawk_cast2_to_str(mawk_state_t *, mawk_cell_t *);
void mawk_cast2_to_num(mawk_state_t *, mawk_cell_t *);
void mawk_cast_to_RE(mawk_state_t *, mawk_cell_t *);
void mawk_cast_for_split(mawk_state_t *, mawk_cell_t *);
void mawk_check_strnum(mawk_state_t *, mawk_cell_t *);
void mawk_cast_to_REPL(mawk_state_t *, mawk_cell_t *);

#define d_to_i(d)     ((int)mawk_d_to_I(d))


int test(mawk_state_t *, mawk_cell_t *);	/* test for null non-null */
mawk_cell_t *repl_cpy(mawk_cell_t *, mawk_cell_t *);
void DB_cell_destroy(mawk_state_t *, mawk_cell_t *);
void overflow(mawk_state_t *, char *, unsigned);
void mawk_rt_overflow(mawk_state_t * MAWK, char *, unsigned);
void mawk_rt_error(mawk_state_t *, const char *, ...);
void mawk_set_errno(mawk_state_t * MAWK, const char *error);


void mawk_exit_(mawk_state_t *, int);
#define mawk_exitval(MAWK, x, RETVAL) \
	do { \
		mawk_exit_(MAWK, x); \
		return RETVAL; \
	} while(0);

#define mawk_exit(MAWK, x) \
	do { \
		mawk_exit_(MAWK, x); \
		return; \
	} while(0);

void mawk_da(mawk_state_t *, INST *, void *);
char *mawk_str_str(char *, char *, unsigned);
char *mawk_rm_escape(mawk_state_t *, char *);
char *mawk_re_pos_match(mawk_state_t *, char *, PTR, unsigned *);
int mawk_binmode(void);


void mawk_bozo(mawk_state_t *, char *);
void mawk_errmsg(mawk_state_t *, int, char *, ...);
void mawk_compile_error(mawk_state_t *, const char *, ...);

void mawk_execute(mawk_state_t *, INST *, mawk_cell_t *, mawk_cell_t *);
const char *mawk_find_kw_str(int);

void mawk_overflow(mawk_state_t * MAWK, char *s, unsigned size);
void mawk_bi_vars_init(mawk_state_t * MAWK);
void mawk_bi_funct_init(mawk_state_t * MAWK);
#ifdef MAKW_MEM_PEDANTIC
void mawk_bi_funct_uninit(mawk_state_t *MAWK)
#endif
void mawk_code_init(mawk_state_t *MAWK);
void mawk_parse(mawk_state_t *);

#ifndef MAWK_NO_FLOAT
#	ifdef HAVE_STRTOD_OVF_BUG
		double strtod_with_ovf_bug(const char *, char **);
#		define strtod  strtod_with_ovf_bug
#	endif
#endif

#endif /* MAWK_H */