/********************************************
parse.y
libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991-94, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/* $Log: parse.y,v $
* Revision 1.11 1995/06/11 22:40:09 mike
* change if(dump_code) -> if(dump_code_flag)
* cleanup of parse()
* add cast to shutup solaris cc compiler on char to int comparison
* switch_code_to_main() which cleans up outside_error production
*
* Revision 1.10 1995/04/21 14:20:21 mike
* move_level variable to fix bug in arglist patching of moved code.
*
* Revision 1.9 1995/02/19 22:15:39 mike
* Always set the call_offset field in a CA_REC (for obscure
* reasons in fcall.c (see comments) there.)
*
* Revision 1.8 1994/12/13 00:39:20 mike
* delete A statement to delete all of A at once
*
* Revision 1.7 1994/10/08 19:15:48 mike
* remove SM_DOS
*
* Revision 1.6 1993/12/01 14:25:17 mike
* reentrant array loops
*
* Revision 1.5 1993/07/22 00:04:13 mike
* new op code _LJZ _LJNZ
*
* Revision 1.4 1993/07/15 23:38:15 mike
* SIZE_T and indent
*
* Revision 1.3 1993/07/07 00:07:46 mike
* more work on 1.2
*
* Revision 1.2 1993/07/03 21:18:01 mike
* bye to yacc_mem
*
* Revision 1.1.1.1 1993/07/03 18:58:17 mike
* move source to cvs
*
* Revision 5.8 1993/05/03 01:07:18 mike
* fix mawk_bozo in LENGTH production
*
* Revision 5.7 1993/01/09 19:03:44 mike
* code_pop checks if the resolve_list needs relocation
*
* Revision 5.6 1993/01/07 02:50:33 mike
* relative vs absolute code
*
* Revision 5.5 1993/01/01 21:30:48 mike
* split mawk_new_STRING() into mawk_new_STRING and mawk_new_STRING0
*
* Revision 5.4 1992/08/08 17:17:20 brennan
* patch 2: improved timing of error recovery in
* bungled function definitions. Fixes a core dump
*
* Revision 5.3 1992/07/08 15:43:41 brennan
* patch2: length returns. I am a wimp
*
* Revision 5.2 1992/01/08 16:11:42 brennan
* code FE_PUSHA carefully for MSDOS large mode
*
* Revision 5.1 91/12/05 07:50:22 brennan
* 1.1 pre-release
*
*/
%pure-parser
%parse-param {mawk_state_t *MAWK}
%lex-param {mawk_state_t *MAWK}
%{
#include <stdio.h>
#include "mawk.h"
#include "types.h"
#include "symtype.h"
#include "code.h"
#include "memory.h"
#include "bi_funct.h"
#include "bi_vars.h"
#include "jmp.h"
#include "field.h"
#include "files.h"
#include "scan.h"
#include "zmalloc.h"
#include "f2d.h"
#define YYMAXDEPTH 200
void mawk_eat_nl(mawk_state_t * MAWK, YYSTYPE *lvalp);
static void resize_fblock(mawk_state_t *, FBLOCK *);
static void switch_code_to_main(mawk_state_t *);
static void mawk_code_array(mawk_state_t *, SYMTAB *);
static void mawk_code_call_id(mawk_state_t *, CA_REC *, SYMTAB *);
static void field_A2I(mawk_state_t *MAWK);
static void check_var(mawk_state_t *, SYMTAB *);
static void check_array(mawk_state_t *, SYMTAB *);
static void RE_as_arg(mawk_state_t *MAWK);
void mawk_parser_include(mawk_state_t *MAWK, void *str);
#define mawk_code_address(x) \
do { \
if (is_local(x)) \
mawk_code2op(MAWK, L_PUSHA, (x)->offset) ;\
else \
code2(MAWK, _PUSHA, (x)->stval.cp); \
} while(0)
#define CDP(x) (mawk_code_base+(x))
/* WARNING: These CDP() calculations become invalid after calls
that might change code_base. Which are: code2(), mawk_code2op(),
code_jmp() and code_pop().
*/
/* this nonsense caters to MSDOS large model */
#define CODE_FE_PUSHA() mawk_code_ptr->ptr = (PTR) 0 ; code1(FE_PUSHA)
%}
%union{
mawk_cell_t *cp ;
SYMTAB *stp ;
int start ; /* code starting address as offset from code_base */
PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */
const BI_REC *bip ; /* ptr to info about a builtin */
FBLOCK *fbp ; /* ptr to a function block */
ARG2_REC *arg2p ;
CA_REC *ca_p ;
int ival ;
PTR ptr ;
}
/* two tokens to help with errors */
%token UNEXPECTED /* unexpected character */
%token BAD_DECIMAL
%token NL
%token SEMI_COLON
%token LBRACE RBRACE
%token LBOX RBOX
%token COMMA
%token <ival> IO_OUT /* > or output pipe */
%right ASSIGN ADD_ASG SUB_ASG MUL_ASG DIV_ASG MOD_ASG POW_ASG
%right QMARK COLON
%left OR
%left AND
%left IN
%left <ival> MATCH /* ~ or !~ */
%left EQ NEQ LT LTE GT GTE
%left CAT
%left GETLINE
%left PLUS MINUS
%left MUL DIV MOD
%left NOT UMINUS
%nonassoc IO_IN PIPE
%right POW
%left <ival> INC_or_DEC
%left DOLLAR FIELD /* last to remove a SR conflict
with getline */
%right LPAREN RPAREN /* removes some SR conflicts */
%token <ptr> DOUBLE STRING_ RE
%token <stp> ID D_ID
%token <fbp> FUNCT_ID
%token <ptr> C_FUNCT_ID
%token <bip> BUILTIN LENGTH
%token <cp> FIELD
%token PRINT PRINTF SPLIT MATCH_FUNC SUB GSUB
/* keywords */
%token DO WHILE FOR BREAK CONTINUE IF ELSE IN
%token DELETE BEGIN END EXIT NEXT RETURN FUNCTION INCLUDE
%type <start> block block_or_separator
%type <start> statement_list statement mark
%type <ival> pr_args
%type <arg2p> arg2
%type <start> builtin
%type <start> getline_file
%type <start> lvalue lvalue_arrwr bifunct_target_arr field bifunct_target
%type <start> expr cat_expr p_expr
%type <start> while_front if_front
%type <start> for1 for2
%type <start> array_loop_front
%type <start> return_statement
%type <start> split_front re_arg sub_back
%type <ival> arglist args
%type <fp> print sub_or_gsub
%type <fbp> funct_start funct_head
%type <ca_p> call_args ca_front ca_back
%type <ival> f_arglist f_args
%%
/* productions */
program : program_block
| program program_block
;
program_block : PA_block /* pattern-action */
| function_def
| outside_error block
;
PA_block : block
{ /* this do nothing action removes a vacuous warning
from Bison */
}
| include
| NL /* allow newline anywhere between blocks - normally scan.c eats this up, but include introduces a corner case */
| BEGIN
{ mawk_be_setup(MAWK, MAWK->scope = SCOPE_BEGIN) ; }
block
{ switch_code_to_main(MAWK) ; }
| END
{ mawk_be_setup(MAWK, MAWK->scope = SCOPE_END) ; }
block
{ switch_code_to_main(MAWK) ; }
| expr /* this works just like an if statement */
{ mawk_code_jmp(MAWK, _JZ, (INST*)0) ; }
block_or_separator
{ mawk_patch_jmp(MAWK, mawk_code_ptr ) ; }
/* range pattern, see comment in mawk_execute.c near _RANGE */
| expr COMMA
{
INST *p1 = CDP($1) ;
int len ;
mawk_code_push(MAWK, p1, mawk_code_ptr - p1, MAWK->scope, MAWK->active_funct) ;
mawk_code_ptr = p1 ;
mawk_code2op(MAWK, _RANGE_CHK, 1) ;
mawk_code_ptr += 3 ;
len = mawk_code_pop(MAWK, mawk_code_ptr) ;
mawk_code_ptr += len ;
code1(_RANGE_STOP) ;
p1 = CDP($1) ;
p1[2].op = mawk_code_ptr - (p1+1) ;
}
expr
{ code1(_RANGE_STOP) ; }
block_or_separator
{
INST *p1 = CDP($1) ;
p1[3].op = CDP($6) - (p1+1) ;
p1[4].op = mawk_code_ptr - (p1+1) ;
}
;
block : LBRACE statement_list RBRACE
{ $$ = $2 ; }
| LBRACE error RBRACE
{ $$ = mawk_code_offset ; /* does nothing won't be mawk_executed */
MAWK->print_flag = MAWK->getline_flag = MAWK->paren_cnt = 0 ;
yyerrok ; }
;
block_or_separator : block
| separator /* default print action */
{ $$ = mawk_code_offset ;
code1(_PUSHINT) ; code1(0) ;
code2(MAWK, _PRINT, mawk_f2d(mawk_bi_print)) ;
}
statement_list : statement
| statement_list statement
;
statement : block
| expr separator
{ code1(_POP) ; }
| /* empty */ separator
{ $$ = mawk_code_offset ; }
| error separator
{ $$ = mawk_code_offset ;
MAWK->print_flag = MAWK->getline_flag = 0 ;
MAWK->paren_cnt = 0 ;
yyerrok ;
}
| BREAK separator
{ $$ = mawk_code_offset ; mawk_BC_insert(MAWK, 'B', mawk_code_ptr+1) ;
code2(MAWK, _JMP, 0) /* don't use mawk_code_jmp ! */ ; }
| CONTINUE separator
{ $$ = mawk_code_offset ; mawk_BC_insert(MAWK, 'C', mawk_code_ptr+1) ;
code2(MAWK, _JMP, 0) ; }
| return_statement
{ if ( MAWK->scope != SCOPE_FUNCT )
mawk_compile_error(MAWK, "return outside function body") ;
}
| NEXT separator
{ if ( MAWK->scope != SCOPE_MAIN )
mawk_compile_error(MAWK, "improper use of next" ) ;
$$ = mawk_code_offset ;
code1(_NEXT) ;
}
;
separator : NL | SEMI_COLON
;
expr : cat_expr
| lvalue ASSIGN expr { code1(_ASSIGN) ; }
| lvalue ADD_ASG expr { code1(_ADD_ASG) ; }
| lvalue SUB_ASG expr { code1(_SUB_ASG) ; }
| lvalue MUL_ASG expr { code1(_MUL_ASG) ; }
| lvalue DIV_ASG expr { code1(_DIV_ASG) ; }
| lvalue MOD_ASG expr { code1(_MOD_ASG) ; }
| lvalue POW_ASG expr { code1(_POW_ASG) ; }
| lvalue_arrwr ASSIGN expr { code1(_ASSIGN_ARR) ; }
| lvalue_arrwr ADD_ASG expr { code1(_ADD_ASG_ARR) ; }
| lvalue_arrwr SUB_ASG expr { code1(_SUB_ASG_ARR) ; }
| lvalue_arrwr MUL_ASG expr { code1(_MUL_ASG_ARR) ; }
| lvalue_arrwr DIV_ASG expr { code1(_DIV_ASG_ARR) ; }
| lvalue_arrwr MOD_ASG expr { code1(_MOD_ASG_ARR) ; }
| lvalue_arrwr POW_ASG expr { code1(_POW_ASG_ARR) ; }
| expr EQ expr { code1(_EQ) ; }
| expr NEQ expr { code1(_NEQ) ; }
| expr LT expr { code1(_LT) ; }
| expr LTE expr { code1(_LTE) ; }
| expr GT expr { code1(_GT) ; }
| expr GTE expr { code1(_GTE) ; }
| expr MATCH expr
{
INST *p3 = CDP($3) ;
if ( p3 == mawk_code_ptr - 2 )
{
if ( p3->op == _MATCH0 ) p3->op = _MATCH1 ;
else /* check for string */
if ( p3->op == _PUSHS )
{
mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ;
cp->type = C_STRING ;
cp->ptr = p3[1].ptr ;
mawk_cast_to_RE(MAWK, cp) ;
mawk_code_ptr -= 2 ;
code2(MAWK, _MATCH1, cp->ptr) ;
MAWK_ZFREE(MAWK, cp) ;
}
else code1(_MATCH2) ;
}
else code1(_MATCH2) ;
if ( !$2 ) code1(_NOT) ;
}
/* short circuit boolean evaluation */
| expr OR
{ code1(_TEST) ;
mawk_code_jmp(MAWK, _LJNZ, (INST*)0) ;
}
expr
{ code1(_TEST) ; mawk_patch_jmp(MAWK, mawk_code_ptr) ; }
| expr AND
{ code1(_TEST) ;
mawk_code_jmp(MAWK, _LJZ, (INST*)0) ;
}
expr
{ code1(_TEST) ; mawk_patch_jmp(MAWK, mawk_code_ptr) ; }
| expr QMARK { mawk_code_jmp(MAWK, _JZ, (INST*)0) ; }
expr COLON { mawk_code_jmp(MAWK, _JMP, (INST*)0) ; }
expr
{ mawk_patch_jmp(MAWK, mawk_code_ptr) ; mawk_patch_jmp(MAWK, CDP($7)) ; }
;
cat_expr : p_expr %prec CAT
| cat_expr p_expr %prec CAT
{ code1(_CAT) ; }
;
p_expr : DOUBLE
{ $$ = mawk_code_offset ; code2(MAWK, _PUSHD, $1) ; }
| STRING_
{ $$ = mawk_code_offset ; code2(MAWK, _PUSHS, $1) ; }
| ID %prec AND /* anything less than IN */
{ check_var(MAWK, $1) ;
$$ = mawk_code_offset ;
if ( is_local($1) )
{ mawk_code2op(MAWK, L_PUSHI, $1->offset) ; }
else code2(MAWK, _PUSHI, $1->stval.cp) ;
}
| LPAREN expr RPAREN
{ $$ = $2 ; }
;
p_expr : RE
{ $$ = mawk_code_offset ; code2(MAWK, _MATCH0, $1) ; }
;
p_expr : p_expr PLUS p_expr { code1(_ADD) ; }
| p_expr MINUS p_expr { code1(_SUB) ; }
| p_expr MUL p_expr { code1(_MUL) ; }
| p_expr DIV p_expr { code1(_DIV) ; }
| p_expr MOD p_expr { code1(_MOD) ; }
| p_expr POW p_expr { code1(_POW) ; }
| NOT p_expr
{ $$ = $2 ; code1(_NOT) ; }
| PLUS p_expr %prec UMINUS
{ $$ = $2 ; code1(_UPLUS) ; }
| MINUS p_expr %prec UMINUS
{ $$ = $2 ; code1(_UMINUS) ; }
| builtin
;
p_expr : ID INC_or_DEC
{ check_var(MAWK, $1) ;
$$ = mawk_code_offset ;
mawk_code_address($1) ;
if ( $2 == '+' ) code1(_POST_INC) ;
else code1(_POST_DEC) ;
}
| INC_or_DEC lvalue
{ $$ = $2 ;
if ( $1 == '+' ) code1(_PRE_INC) ;
else code1(_PRE_DEC) ;
}
| INC_or_DEC lvalue_arrwr
{ $$ = $2 ;
if ( $1 == '+' ) code1(_PRE_INC_ARR) ;
else code1(_PRE_DEC_ARR) ;
}
;
p_expr : field INC_or_DEC
{ if ($2 == '+' ) code1(F_POST_INC ) ;
else code1(F_POST_DEC) ;
}
| INC_or_DEC field
{ $$ = $2 ;
if ( $1 == '+' ) code1(F_PRE_INC) ;
else code1( F_PRE_DEC) ;
}
;
lvalue : ID
{ $$ = mawk_code_offset ;
check_var(MAWK, $1) ;
mawk_code_address($1) ;
}
;
arglist : /* empty */
{ $$ = 0 ; }
| args
;
args : expr %prec LPAREN
{ $$ = 1 ; }
| args COMMA expr
{ $$ = $1 + 1 ; }
;
builtin :
BUILTIN mark LPAREN arglist RPAREN
{ const BI_REC *p = $1 ;
$$ = $2 ;
if ( (int)p->min_args > $4 || (int)p->max_args < $4 )
mawk_compile_error(
MAWK, "wrong number of arguments in call to %s" ,
p->name ) ;
if ( p->min_args != p->max_args ) /* variable args */
{ code1(_PUSHINT) ; code1($4) ; }
code2(MAWK, _BUILTIN , mawk_f2d(p->fp)) ;
}
| LENGTH /* this is an irritation */
{
$$ = mawk_code_offset ;
code1(_PUSHINT) ; code1(0) ;
code2(MAWK, _BUILTIN, mawk_f2d($1->fp)) ;
}
;
/* an empty production to store the mawk_code_ptr */
mark : /* empty */
{ $$ = mawk_code_offset ; }
/* print_statement */
statement : print mark pr_args pr_direction separator
{ code2(MAWK, _PRINT, mawk_f2d($1)) ;
if ( $1 == mawk_bi_printf && $3 == 0 )
mawk_compile_error(MAWK, "no arguments in call to printf") ;
MAWK->print_flag = 0 ;
$$ = $2 ;
}
;
print : PRINT { $$ = mawk_bi_print ; MAWK->print_flag = 1 ;}
| PRINTF { $$ = mawk_bi_printf ; MAWK->print_flag = 1 ; }
;
pr_args : arglist { mawk_code2op(MAWK, _PUSHINT, $1) ; }
| LPAREN arg2 RPAREN
{ $$ = $2->cnt ; mawk_zfree(MAWK, $2,sizeof(ARG2_REC)) ;
mawk_code2op(MAWK, _PUSHINT, $$) ;
}
| LPAREN RPAREN
{ $$=0 ; mawk_code2op(MAWK, _PUSHINT, 0) ; }
;
arg2 : expr COMMA expr
{ $$ = (ARG2_REC*) mawk_zmalloc(MAWK, sizeof(ARG2_REC)) ;
$$->start = $1 ;
$$->cnt = 2 ;
}
| arg2 COMMA expr
{ $$ = $1 ; $$->cnt++ ; }
;
pr_direction : /* empty */
| IO_OUT expr
{ mawk_code2op(MAWK, _PUSHINT, $1) ; }
;
/* IF and IF-ELSE */
if_front : IF LPAREN expr RPAREN
{ $$ = $3 ; mawk_eat_nl(MAWK, &yylval) ; mawk_code_jmp(MAWK, _JZ, (INST*)0) ; }
;
/* if_statement */
statement : if_front statement
{ mawk_patch_jmp(MAWK, mawk_code_ptr ) ; }
;
else : ELSE { mawk_eat_nl(MAWK, &yylval) ; mawk_code_jmp(MAWK, _JMP, (INST*)0) ; }
;
/* if_else_statement */
statement : if_front statement else statement
{ mawk_patch_jmp(MAWK, mawk_code_ptr) ;
mawk_patch_jmp(MAWK, CDP($4)) ;
}
/* LOOPS */
do : DO
{ mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; }
;
/* do_statement */
statement : do statement WHILE LPAREN expr RPAREN separator
{ $$ = $2 ;
mawk_code_jmp(MAWK, _JNZ, CDP($2)) ;
mawk_BC_clear(MAWK, mawk_code_ptr, CDP($5)) ; }
;
while_front : WHILE LPAREN expr RPAREN
{ mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ;
$$ = $3 ;
/* check if const expression */
if ( mawk_code_ptr - 2 == CDP($3) &&
mawk_code_ptr[-2].op == _PUSHD &&
*(double*)mawk_code_ptr[-1].ptr != 0.0
)
mawk_code_ptr -= 2 ;
else
{ INST *p3 = CDP($3) ;
mawk_code_push(MAWK, p3, mawk_code_ptr-p3, MAWK->scope, MAWK->active_funct) ;
mawk_code_ptr = p3 ;
code2(MAWK, _JMP, (INST*)0) ; /* code2() not mawk_code_jmp() */
}
}
;
/* while_statement */
statement : while_front statement
{
int saved_offset ;
int len ;
INST *p1 = CDP($1) ;
INST *p2 = CDP($2) ;
if ( p1 != p2 ) /* real mawk_test in loop */
{
p1[1].op = mawk_code_ptr-(p1+1) ;
saved_offset = mawk_code_offset ;
len = mawk_code_pop(MAWK, mawk_code_ptr) ;
mawk_code_ptr += len ;
mawk_code_jmp(MAWK, _JNZ, CDP($2)) ;
mawk_BC_clear(MAWK, mawk_code_ptr, CDP(saved_offset)) ;
}
else /* while(1) */
{
mawk_code_jmp(MAWK, _JMP, p1) ;
mawk_BC_clear(MAWK, mawk_code_ptr, CDP($2)) ;
}
}
;
/* for_statement */
statement : for1 for2 for3 statement
{
int cont_offset = mawk_code_offset ;
unsigned len = mawk_code_pop(MAWK, mawk_code_ptr) ;
INST *p2 = CDP($2) ;
INST *p4 = CDP($4) ;
mawk_code_ptr += len ;
if ( p2 != p4 ) /* real mawk_test in for2 */
{
p4[-1].op = mawk_code_ptr - p4 + 1 ;
len = mawk_code_pop(MAWK, mawk_code_ptr) ;
mawk_code_ptr += len ;
mawk_code_jmp(MAWK, _JNZ, CDP($4)) ;
}
else /* for(;;) */
mawk_code_jmp(MAWK, _JMP, p4) ;
mawk_BC_clear(MAWK, mawk_code_ptr, CDP(cont_offset)) ;
}
;
for1 : FOR LPAREN SEMI_COLON { $$ = mawk_code_offset ; }
| FOR LPAREN expr SEMI_COLON
{ $$ = $3 ; code1(_POP) ; }
;
for2 : SEMI_COLON { $$ = mawk_code_offset ; }
| expr SEMI_COLON
{
if ( mawk_code_ptr - 2 == CDP($1) &&
mawk_code_ptr[-2].op == _PUSHD &&
* (double*) mawk_code_ptr[-1].ptr != 0.0
)
mawk_code_ptr -= 2 ;
else
{
INST *p1 = CDP($1) ;
mawk_code_push(MAWK, p1, mawk_code_ptr-p1, MAWK->scope, MAWK->active_funct) ;
mawk_code_ptr = p1 ;
code2(MAWK, _JMP, (INST*)0) ;
}
}
;
for3 : RPAREN
{ mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ;
mawk_code_push(MAWK, (INST*)0,0, MAWK->scope, MAWK->active_funct) ;
}
| expr RPAREN
{ INST *p1 = CDP($1) ;
mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ;
code1(_POP) ;
mawk_code_push(MAWK, p1, mawk_code_ptr - p1, MAWK->scope, MAWK->active_funct) ;
mawk_code_ptr -= mawk_code_ptr - p1 ;
}
;
/* arrays */
expr : expr IN ID
{ check_array(MAWK, $3) ;
mawk_code_array(MAWK, $3) ;
code1(A_TEST) ;
}
| LPAREN arg2 RPAREN IN ID
{ $$ = $2->start ;
mawk_code2op(MAWK, A_CAT, $2->cnt) ;
mawk_zfree(MAWK, $2, sizeof(ARG2_REC)) ;
check_array(MAWK, $5) ;
mawk_code_array(MAWK, $5) ;
code1(A_TEST) ;
}
;
/* array reference for a variable that is in the target (writable) part of
a bi_funct call such as gsub, sub or getline */
bifunct_target_arr : ID mark LBOX args RBOX
{
if ( $4 > 1 )
{ mawk_code2op(MAWK, A_CAT, $4) ; }
check_array(MAWK, $1) ;
if( is_local($1) )
{ mawk_code2op(MAWK, LAE_PUSHA, $1->offset) ; }
else code2(MAWK, AE_PUSHA, $1->stval.array) ;
$$ = $2 ;
}
;
lvalue_arrwr : ID mark LBOX args RBOX
{
if ( $4 > 1 )
{ mawk_code2op(MAWK, A_CAT, $4) ; }
check_array(MAWK, $1) ;
if( is_local($1) )
{ mawk_code2op(MAWK, LAE_PUSHA_WRARR, $1->offset) ; }
else code2(MAWK, AE_PUSHA_WRARR, $1->stval.array) ;
$$ = $2 ;
}
;
p_expr : ID mark LBOX args RBOX %prec AND
{
if ( $4 > 1 )
{ mawk_code2op(MAWK, A_CAT, $4) ; }
check_array(MAWK, $1) ;
if( is_local($1) )
{ mawk_code2op(MAWK, LAE_PUSHI, $1->offset) ; }
else code2(MAWK, AE_PUSHI, $1->stval.array) ;
$$ = $2 ;
}
| ID mark LBOX args RBOX INC_or_DEC
{
if ( $4 > 1 )
{ mawk_code2op(MAWK, A_CAT,$4) ; }
check_array(MAWK, $1) ;
if( is_local($1) )
{ mawk_code2op(MAWK, LAE_PUSHA_WRARR, $1->offset) ; }
else code2(MAWK, AE_PUSHA_WRARR, $1->stval.array) ;
if ( $6 == '+' ) code1(_POST_INC_ARR) ;
else code1(_POST_DEC_ARR) ;
$$ = $2 ;
}
;
/* delete A[i] or delete A */
statement : DELETE ID mark LBOX args RBOX separator
{
$$ = $3 ;
if ( $5 > 1 ) { mawk_code2op(MAWK, A_CAT, $5) ; }
check_array(MAWK, $2) ;
mawk_code_array(MAWK, $2) ;
code1(A_DEL) ;
}
| DELETE ID separator
{
$$ = mawk_code_offset ;
check_array(MAWK, $2) ;
mawk_code_array(MAWK, $2) ;
code1(DEL_A) ;
}
;
/* for ( i in A ) statement */
array_loop_front : FOR LPAREN ID IN ID RPAREN
{ mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ;
$$ = mawk_code_offset ;
check_var(MAWK, $3) ;
mawk_code_address($3) ;
check_array(MAWK, $5) ;
mawk_code_array(MAWK, $5) ;
code2(MAWK, SET_ALOOP, (INST*)0) ;
}
;
/* array_loop */
statement : array_loop_front statement
{
INST *p2 = CDP($2) ;
p2[-1].op = mawk_code_ptr - p2 + 1 ;
mawk_BC_clear(MAWK, mawk_code_ptr+2 , mawk_code_ptr) ;
mawk_code_jmp(MAWK, ALOOP, p2) ;
code1(POP_AL) ;
}
;
/* fields
D_ID is a special token , same as an ID, but yylex()
only returns it after a '$'. In essense,
DOLLAR D_ID is really one token.
*/
field : FIELD
{ $$ = mawk_code_offset ; code2(MAWK, F_PUSHA, $1) ; }
| DOLLAR D_ID
{ check_var(MAWK, $2) ;
$$ = mawk_code_offset ;
if ( is_local($2) )
{ mawk_code2op(MAWK, L_PUSHI, $2->offset) ; }
else code2(MAWK, _PUSHI, $2->stval.cp) ;
CODE_FE_PUSHA() ;
}
| DOLLAR D_ID mark LBOX args RBOX
{
if ( $5 > 1 )
{ mawk_code2op(MAWK, A_CAT, $5) ; }
check_array(MAWK, $2) ;
if( is_local($2) )
{ mawk_code2op(MAWK, LAE_PUSHI, $2->offset) ; }
else code2(MAWK, AE_PUSHI, $2->stval.array) ;
CODE_FE_PUSHA() ;
$$ = $3 ;
}
| DOLLAR p_expr
{ $$ = $2 ; CODE_FE_PUSHA() ; }
| LPAREN field RPAREN
{ $$ = $2 ; }
;
p_expr : field %prec CAT /* removes field (++|--) sr conflict */
{ field_A2I(MAWK) ; }
;
expr : field ASSIGN expr { code1(F_ASSIGN) ; }
| field ADD_ASG expr { code1(F_ADD_ASG) ; }
| field SUB_ASG expr { code1(F_SUB_ASG) ; }
| field MUL_ASG expr { code1(F_MUL_ASG) ; }
| field DIV_ASG expr { code1(F_DIV_ASG) ; }
| field MOD_ASG expr { code1(F_MOD_ASG) ; }
| field POW_ASG expr { code1(F_POW_ASG) ; }
;
/* split is handled different than a builtin because
it takes an array and optionally a regular expression as args */
p_expr : split_front split_back
{ code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_split)) ; }
;
split_front : SPLIT LPAREN expr COMMA ID
{ $$ = $3 ;
check_array(MAWK, $5) ;
mawk_code_array(MAWK, $5) ;
}
;
split_back : RPAREN
{ code2(MAWK, _PUSHI, &MAWK->fs_shadow) ; }
| COMMA expr RPAREN
{
if ( CDP($2) == mawk_code_ptr - 2 )
{
if ( mawk_code_ptr[-2].op == _MATCH0 )
RE_as_arg(MAWK) ;
else
if ( mawk_code_ptr[-2].op == _PUSHS )
{ mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ;
cp->type = C_STRING ;
cp->ptr = mawk_code_ptr[-1].ptr ;
mawk_cast_for_split(MAWK, cp) ;
mawk_code_ptr[-2].op = _PUSHC ;
mawk_code_ptr[-1].ptr = (PTR) cp ;
}
}
}
;
/* match(expr, RE) */
p_expr : MATCH_FUNC LPAREN expr COMMA re_arg RPAREN
{ $$ = $3 ;
code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_match)) ;
}
;
re_arg : expr
{
INST *p1 = CDP($1) ;
if ( p1 == mawk_code_ptr - 2 )
{
if ( p1->op == _MATCH0 ) RE_as_arg(MAWK) ;
else
if ( p1->op == _PUSHS )
{ mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ;
cp->type = C_STRING ;
cp->ptr = p1[1].ptr ;
mawk_cast_to_RE(MAWK, cp) ;
p1->op = _PUSHC ;
p1[1].ptr = (PTR) cp ;
}
}
}
/* exit_statement */
statement : EXIT separator
{ $$ = mawk_code_offset ;
code1(_EXIT0) ; }
| EXIT expr separator
{ $$ = $2 ; code1(_EXIT) ; }
return_statement : RETURN separator
{ $$ = mawk_code_offset ;
code1(_RET0) ; }
| RETURN expr separator
{ $$ = $2 ; code1(_RET) ; }
/* getline */
p_expr : getline %prec GETLINE
{ $$ = mawk_code_offset ;
code2(MAWK, F_PUSHA, &MAWK->field[0]) ;
code1(_PUSHINT) ; code1(0) ;
code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ;
MAWK->getline_flag = 0 ;
}
| getline bifunct_target %prec GETLINE
{ $$ = $2 ;
code1(_PUSHINT) ; code1(0) ;
code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ;
MAWK->getline_flag = 0 ;
}
| getline_file p_expr %prec IO_IN
{ code1(_PUSHINT) ; code1(F_IN) ;
code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ;
/* getline_flag already off in yylex() */
}
| p_expr PIPE GETLINE
{ code2(MAWK, F_PUSHA, &MAWK->field[0]) ;
code1(_PUSHINT) ; code1(PIPE_IN) ;
code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ;
}
| p_expr PIPE GETLINE bifunct_target
{
code1(_PUSHINT) ; code1(PIPE_IN) ;
code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ;
}
;
getline : GETLINE { MAWK->getline_flag = 1 ; }
/* gsub(), sub() and getline has to modify one of their args; the arg has
to be a bifunct_target for reference pass */
bifunct_target : lvalue | bifunct_target_arr | field ;
getline_file : getline IO_IN
{ $$ = mawk_code_offset ;
code2(MAWK, F_PUSHA, MAWK->field+0) ;
}
| getline bifunct_target IO_IN
{ $$ = $2 ; }
;
/*==========================================
sub and gsub
==========================================*/
p_expr : sub_or_gsub LPAREN re_arg COMMA expr sub_back
{
INST *p5 = CDP($5) ;
INST *p6 = CDP($6) ;
if ( p6 - p5 == 2 && p5->op == _PUSHS )
{ /* cast from STRING to REPL at compile time */
mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ;
cp->type = C_STRING ;
cp->ptr = p5[1].ptr ;
mawk_cast_to_REPL(MAWK, cp) ;
p5->op = _PUSHC ;
p5[1].ptr = (PTR) cp ;
}
code2(MAWK, _BUILTIN, mawk_f2d($1)) ;
$$ = $3 ;
}
;
sub_or_gsub : SUB { $$ = mawk_bi_sub ; }
| GSUB { $$ = mawk_bi_gsub ; }
;
sub_back : RPAREN /* substitute into $0 */
{ $$ = mawk_code_offset ;
code2(MAWK, F_PUSHA, &MAWK->field[0]) ;
}
| COMMA bifunct_target RPAREN
{ $$ = $2 ; }
;
/*================================================
user defined functions
*=================================*/
function_def : funct_start block
{
resize_fblock(MAWK, $1) ;
mawk_restore_ids(MAWK) ;
switch_code_to_main(MAWK) ;
}
;
funct_start : funct_head LPAREN f_arglist RPAREN
{ mawk_eat_nl(MAWK, &yylval) ;
MAWK->scope = SCOPE_FUNCT ;
MAWK->active_funct = $1 ;
*MAWK->main_code_p = MAWK->active_code ;
$1->nargs = $3 ;
if ( $3 )
$1->typev = (char *)
memset( mawk_zmalloc(MAWK, $3), ST_LOCAL_NONE, $3) ;
else $1->typev = (char *) 0 ;
mawk_code_ptr = mawk_code_base =
(INST *) mawk_zmalloc(MAWK, INST_BYTES(PAGESZ));
mawk_code_limit = mawk_code_base + PAGESZ ;
mawk_code_warn = mawk_code_limit - CODEWARN ;
}
;
funct_head : FUNCTION ID
{ FBLOCK *fbp ;
if ( $2->type == ST_NONE )
{
$2->type = ST_FUNCT ;
fbp = $2->stval.fbp =
(FBLOCK *) mawk_zmalloc(MAWK, sizeof(FBLOCK)) ;
fbp->name = $2->name ;
fbp->code = (INST*) 0 ;
}
else
{
mawk_type_error(MAWK, $2 ) ;
/* this FBLOCK will not be put in
the symbol table */
fbp = (FBLOCK*) mawk_zmalloc(MAWK, sizeof(FBLOCK)) ;
fbp->name = "" ;
}
$$ = fbp ;
}
| FUNCTION FUNCT_ID
{ $$ = $2 ;
if ( $2->code )
mawk_compile_error(MAWK, "redefinition of %s" , $2->name) ;
}
;
f_arglist : /* empty */ { $$ = 0 ; }
| f_args
;
f_args : ID
{ $1 = mawk_save_id(MAWK, $1->name) ;
$1->type = ST_LOCAL_NONE ;
$1->offset = 0 ;
$$ = 1 ;
}
| f_args COMMA ID
{ if ( is_local($3) )
mawk_compile_error(MAWK, "%s is duplicated in argument list",
$3->name) ;
else
{ $3 = mawk_save_id(MAWK, $3->name) ;
$3->type = ST_LOCAL_NONE ;
$3->offset = $1 ;
$$ = $1 + 1 ;
}
}
;
outside_error : error
{ /* we may have to recover from a bungled function
definition */
/* can have local ids, before code scope
changes */
mawk_restore_ids(MAWK) ;
switch_code_to_main(MAWK) ;
}
;
/* a call to a user defined function */
p_expr : FUNCT_ID mark call_args
{ $$ = $2 ;
code2(MAWK, _CALL, $1) ;
if ( $3 ) code1($3->arg_num+1) ;
else code1(0) ;
mawk_check_fcall(MAWK, $1, MAWK->scope, MAWK->ps.code_move_level, MAWK->active_funct, $3, MAWK->token_lineno);
}
;
p_expr : C_FUNCT_ID mark call_args
{ $$ = $2 ;
code2(MAWK, _CALL, $1) ;
if ( $3 ) code1($3->arg_num+1) ;
else code1(0) ;
mawk_check_ccall(MAWK, $1, MAWK->scope, MAWK->ps.code_move_level, MAWK->active_funct, $3, MAWK->token_lineno);
}
;
call_args : LPAREN RPAREN
{ $$ = (CA_REC *) 0 ; }
| ca_front ca_back
{ $$ = $2 ;
$$->link = $1 ;
$$->arg_num = $1 ? $1->arg_num+1 : 0 ;
}
;
/* The funny definition of ca_front with the COMMA bound to the ID is to
force a shift to avoid a reduce/reduce conflict
ID->id or ID->array
Or to avoid a decision, if the type of the ID has not yet been
determined
*/
ca_front : LPAREN
{ $$ = (CA_REC *) 0 ; }
| ca_front expr COMMA
{ $$ = MAWK_ZMALLOC(MAWK, CA_REC) ;
$$->link = $1 ;
$$->type = CA_EXPR ;
$$->arg_num = $1 ? $1->arg_num+1 : 0 ;
$$->call_offset = mawk_code_offset ;
}
| ca_front ID COMMA
{ $$ = MAWK_ZMALLOC(MAWK, CA_REC) ;
$$->link = $1 ;
$$->arg_num = $1 ? $1->arg_num+1 : 0 ;
mawk_code_call_id(MAWK, $$, $2) ;
}
;
ca_back : expr RPAREN
{ $$ = MAWK_ZMALLOC(MAWK, CA_REC) ;
$$->type = CA_EXPR ;
$$->call_offset = mawk_code_offset ;
}
| ID RPAREN
{ $$ = MAWK_ZMALLOC(MAWK, CA_REC) ;
mawk_code_call_id(MAWK, $$, $1) ;
}
;
include:
INCLUDE STRING_ { mawk_parser_include(MAWK, $2); }
;
multi_nl:
NL
| multi_nl NL
;
%%
/* resize the code for a user function */
static void resize_fblock(mawk_state_t *MAWK, FBLOCK *fbp)
{
CODEBLOCK *p = MAWK_ZMALLOC(MAWK, CODEBLOCK) ;
mawk_code2op(MAWK, _RET0, _HALT) ;
/* make sure there is always a return */
*p = MAWK->active_code ;
fbp->code = mawk_code_shrink(MAWK, p, &fbp->size) ;
/* mawk_code_shrink() zfrees p */
/* this list is alos used to free functions in pedantic mode */
#ifndef MAWK_MEM_PEDANTIC
if ( MAWK->dump_code_flag )
#endif
mawk_add_to_fdump_list(MAWK, fbp) ;
/* printf("CODE add: %p/%d\n", fbp->code, fbp->size);*/
}
/* convert FE_PUSHA to FE_PUSHI
or F_PUSH to F_PUSHI
*/
static void field_A2I(mawk_state_t *MAWK)
{
mawk_cell_t *cp;
if ( mawk_code_ptr[-1].op == FE_PUSHA &&
mawk_code_ptr[-1].ptr == (PTR) 0)
/* On most architectures, the two mawk_tests are the same; a good
compiler might eliminate one. On LM_DOS, and possibly other
segmented architectures, they are not */
{ mawk_code_ptr[-1].op = FE_PUSHI ; }
else
{
cp = (mawk_cell_t *) mawk_code_ptr[-1].ptr ;
if (cp == MAWK->field || (cp > MAWK_NF && cp <= LAST_PFIELD))
{
mawk_code_ptr[-2].op = _PUSHI ;
}
else if ( cp == MAWK_NF )
{ mawk_code_ptr[-2].op = NF_PUSHI ; mawk_code_ptr-- ; }
else
{
mawk_code_ptr[-2].op = F_PUSHI ;
mawk_code_ptr -> op = mawk_field_addr_to_index(MAWK, mawk_code_ptr[-1].ptr ) ;
mawk_code_ptr++ ;
}
}
}
/* we've seen an ID in a context where it should be a VAR,
check that's consistent with previous usage */
static void check_var(mawk_state_t *MAWK, register SYMTAB *p)
{
switch(p->type)
{
case ST_NONE : /* new id */
p->type = ST_VAR ;
p->stval.cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ;
p->stval.cp->type = C_NOINIT ;
break ;
case ST_LOCAL_NONE :
p->type = ST_LOCAL_VAR ;
MAWK->active_funct->typev[p->offset] = ST_LOCAL_VAR ;
break ;
case ST_VAR :
case ST_LOCAL_VAR : break ;
default :
mawk_type_error(MAWK, p) ;
break ;
}
}
/* we've seen an ID in a context where it should be an ARRAY,
check that's consistent with previous usage */
static void check_array(mawk_state_t *MAWK, register SYMTAB *p)
{
switch(p->type)
{
case ST_NONE : /* a new array */
p->type = ST_ARRAY ;
p->stval.array = mawk_array_new(MAWK, NULL) ;
break ;
case ST_ARRAY :
case ST_LOCAL_ARRAY :
break ;
case ST_LOCAL_NONE :
p->type = ST_LOCAL_ARRAY ;
MAWK->active_funct->typev[p->offset] = ST_LOCAL_ARRAY ;
break ;
default : mawk_type_error(MAWK, p) ; break ;
}
}
static void mawk_code_array(mawk_state_t *MAWK, register SYMTAB *p)
{
if ( is_local(p) ) mawk_code2op(MAWK, LA_PUSHA, p->offset) ;
else code2(MAWK, A_PUSHA, p->stval.array) ;
}
/* we've seen an ID as an argument to a user defined function */
static void mawk_code_call_id(mawk_state_t *MAWK, register CA_REC *p, register SYMTAB *ip)
{
p->call_offset = mawk_code_offset ;
/* This always get set now. So that fcall:relocate_arglist
works. */
switch( ip->type )
{
case ST_VAR :
p->type = CA_EXPR ;
code2(MAWK, _PUSHI, ip->stval.cp) ;
break ;
case ST_LOCAL_VAR :
p->type = CA_EXPR ;
mawk_code2op(MAWK, L_PUSHI, ip->offset) ;
break ;
case ST_ARRAY :
p->type = CA_ARRAY ;
code2(MAWK, A_PUSHA, ip->stval.array) ;
break ;
case ST_LOCAL_ARRAY :
p->type = CA_ARRAY ;
mawk_code2op(MAWK, LA_PUSHA, ip->offset) ;
break ;
/* not enough info to code it now; it will have to
be patched later */
case ST_NONE :
p->type = ST_NONE ;
p->sym_p = ip ;
code2(MAWK, _PUSHI, &MAWK->code_call_id_dummy) ;
break ;
case ST_LOCAL_NONE :
p->type = ST_LOCAL_NONE ;
p->type_p = & MAWK->active_funct->typev[ip->offset] ;
mawk_code2op(MAWK, L_PUSHI, ip->offset) ;
break ;
#ifdef DEBUG
default :
mawk_bozo(MAWK, "mawk_code_call_id") ;
#endif
}
}
/* an RE by itself was coded as _MATCH0 , change to
push as an expression */
static void RE_as_arg(mawk_state_t *MAWK)
{
mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ;
mawk_code_ptr -= 2 ;
cp->type = C_RE ;
cp->ptr = mawk_code_ptr[1].ptr ;
code2(MAWK, _PUSHC, cp) ;
}
/* reset the active_code back to the MAIN block */
static void switch_code_to_main(mawk_state_t *MAWK)
{
switch(MAWK->scope)
{
case SCOPE_BEGIN :
*MAWK->begin_code_p = MAWK->active_code ;
MAWK->active_code = *MAWK->main_code_p ;
break ;
case SCOPE_END :
*MAWK->end_code_p = MAWK->active_code ;
MAWK->active_code = *MAWK->main_code_p ;
break ;
case SCOPE_FUNCT :
MAWK->active_code = *MAWK->main_code_p ;
break ;
case SCOPE_MAIN :
break ;
}
MAWK->active_funct = (FBLOCK*) 0 ;
MAWK->scope = SCOPE_MAIN ;
}
void mawk_parse(mawk_state_t *MAWK)
{
if (!MAWK->binary_loaded) {
if ( yyparse(MAWK) || MAWK->compile_error_count != 0 ) mawk_exit(MAWK, 2) ;
mawk_scan_cleanup(MAWK) ;
mawk_set_code(MAWK) ;
/* code must be set before call to mawk_resolve_fcalls() */
if ( MAWK->resolve_list ) mawk_resolve_fcalls(MAWK) ;
}
if ( MAWK->compile_error_count != 0 ) mawk_exit(MAWK, 2) ;
if ( MAWK->dump_code_flag ) { mawk_dump_code(MAWK);}
if ( MAWK->dump_sym_flag ) { mawk_dump_sym_text(MAWK); }
if ((MAWK->dump_code_flag ) || ( MAWK->dump_sym_flag )) { mawk_exit(MAWK, 0); }
(void)mawk_d2f(NULL); /* suppress compiler warning */
}
void mawk_parser_include(mawk_state_t *MAWK, void *str)
{
mawk_parser_push(MAWK);
MAWK->ps.eof_flag = 0 ;
MAWK->ps.pfile_name = ((mawk_string_t *)str)->str;
MAWK->ps.buffp = MAWK->ps.buffer = (unsigned char *) mawk_zmalloc(MAWK, BUFFSZ + 1) ;
*MAWK->ps.buffp = '\0';
if (mawk_scan_open(MAWK) == 1)
MAWK->token_lineno = MAWK->lineno = 1 ;
else
mawk_parser_pop(MAWK);
}