/********************************************
scan.c
libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas;
based on mawk code coming with the below copyright:
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/* help text editors to find out nesting without context */
#define CL_BRACE '{'
#define _POSIX_SOURCE
#define _BSD_SOURCE
#include "mawk.h"
#include <stdio.h>
#include <limits.h>
#include <string.h>
#include "scan.h"
#include "memory.h"
#include "field.h"
#include "init.h"
#include "fin.h"
#include "repl.h"
#include "code.h"
#include "bi_vars.h"
#include "vio.h"
#include "da_bin_helper.h"
#ifndef PATH_MAX
#define PATH_MAX 1024
#endif
#ifndef NO_FCNTL_H
#include <fcntl.h>
#endif
#include "files.h"
/* static functions */
static void scan_fillbuff(mawk_state_t * MAWK);
int mawk_scan_open(mawk_state_t * MAWK);
static int slow_next(mawk_state_t * MAWK);
static void eat_comment(mawk_state_t * MAWK);
static void eat_semi_colon(mawk_state_t * MAWK);
static mawk_num_t collect_decimal(mawk_state_t *, YYSTYPE *lvalp, int, int *);
static int collect_string(mawk_state_t *MAWK, YYSTYPE *lvalp);
static int collect_RE(mawk_state_t *MAWK, YYSTYPE *lvalp);
/*-----------------------------
program file management
*----------------------------*/
/* return 1 if no more processing is needed (was a binary file) */
int mawk_scan_init(mawk_state_t *MAWK, char *cmdline_program)
{
if (cmdline_program) {
MAWK->ps.program_fin = NULL; /* command line program */
MAWK->ps.program_string = mawk_new_STRING0(MAWK, strlen(cmdline_program) + 1);
strcpy(MAWK->ps.program_string->str, cmdline_program);
/* simulate file termination */
MAWK->ps.program_string->str[MAWK->ps.program_string->len - 1] = '\n';
MAWK->ps.buffp = (unsigned char *) MAWK->ps.program_string->str;
MAWK->ps.eof_flag = 1;
}
else { /* program from file[s] */
/* loading the script can not fail yet, as this is the first file */
if (mawk_scan_open(MAWK) == 2) {
un_next();
return 1;
}
if (MAWK->ps.buffer == NULL)
MAWK->ps.buffer = (unsigned char *) mawk_zmalloc(MAWK, BUFFSZ + 1);
MAWK->ps.buffp = MAWK->ps.buffer;
if (MAWK->do_exit)
return -1;
scan_fillbuff(MAWK);
}
#ifdef OS2 /* OS/2 "extproc" is similar to #! */
if (strnicmp(MAWK->ps.buffp, "extproc ", 8) == 0)
eat_comment(MAWK);
#endif
mawk_eat_nl(MAWK, NULL); /* scan to first token */
if (next(MAWK) == 0) {
/* no program */
mawk_exitval(MAWK, 0, -1);
}
un_next();
return 0;
}
/* open MAWK->pfile_name[0]; returns 1 on success, 0 if the file could not
be open (error is also set) or is duplicate and ignored (normal condition) */
int mawk_scan_open(mawk_state_t * MAWK)
{ /* open pfile_name */
if (MAWK->ps.pfile_name[0] == '-' && MAWK->ps.pfile_name[1] == 0) {
MAWK->ps.program_fin = mawk_file_find_(MAWK, "/dev/stdin", F_IN, 1);
}
else {
const char *fn;
char *start, *end, *osp;
int len, nlen, uniq;
char bc;
char path[PATH_MAX];
#ifdef mawk_realpath
char rpath[PATH_MAX];
#else
# define rpath path
#endif
mawk_cell_t idx;
fn = MAWK->ps.pfile_name;
bc = MAWK->ps.pfile_bytecode;
if (*fn == '+') {
fn++;
uniq = 1;
}
else
uniq = 0;
nlen = strlen(fn);
osp = ((mawk_string_t *) LIBPATH->ptr)->str;
if ((osp == NULL) || (*osp == '\0') || (*fn == '/'))
osp = "";
for (end = start = osp; end != NULL; start = end + 1) {
end = strchr(start, ';');
if (end == NULL)
len = strlen(start);
else
len = end - start;
if (len > 0) {
memcpy(path, start, len);
path[len] = '/';
len++;
path[len] = '\0';
}
else
path[0] = '\0';
if (len + nlen > sizeof(path)) {
mawk_errmsg(MAWK, errno, "cannot load awk script - path too long ('%s' '%s' vs %d bytes)", path, MAWK->ps.pfile_name,
sizeof(path));
mawk_exitval(MAWK, 2, -1);
}
memcpy(path + len, fn, nlen);
path[len + nlen] = '\0';
#ifdef mawk_realpath
if (mawk_realpath(path, rpath) == NULL) {
mawk_errmsg(MAWK, errno, "cannot convert to realpath ('%s')", path);
mawk_exitval(MAWK, 2, -1);
}
#endif
idx.type = C_STRING;
idx.ptr = mawk_new_STRING(MAWK, rpath);
if (uniq) {
/* we assume if an entry is in the array, the file exists,
as we already could load it once and there is no reason to assume
it has disappeared meanwhile - well, in theory this is a race
condition as another process may have deleted the file, but then
we would fail with "not found" anyway. */
if (mawk_array_find(MAWK, MAWK->scripts_loaded, &idx, NULL, 0) != 0) {
free_STRING((mawk_string_t *)idx.ptr);
return 0;
}
}
MAWK->binary_loaded = 0;
if (bc) {
if (mawk_load_code_bin(MAWK, path) != 0) {
mawk_errmsg(MAWK, 0, "failed to load or link binary script %s", path);
mawk_exitval(MAWK, 2, -1);
return 0;
}
MAWK->binary_loaded = 1;
free_STRING((mawk_string_t *)idx.ptr);
return 2;
}
else if ((MAWK->ps.program_fin = mawk_file_find_(MAWK, path, F_IN, 1)) != NULL) {
mawk_cell_t one;
one.type = C_NUM;
one.d.dval = MAWK_NUM_ONE;
mawk_array_set(MAWK, MAWK->scripts_loaded, &idx, &one);
free_STRING((mawk_string_t *)idx.ptr);
return 1;
}
free_STRING((mawk_string_t *)idx.ptr);
}
mawk_errmsg(MAWK, errno, "cannot open script %s (used search path %s)", fn, osp);
mawk_exitval(MAWK, 2, -1);
}
return 0;
}
void mawk_scan_cleanup(mawk_state_t * MAWK)
{
if (MAWK->ps.program_fin != NULL) {
mawk_zfree(MAWK, MAWK->ps.buffer, BUFFSZ + 1);
MAWK->ps.buffer = NULL;
MAWK->ps.buffp = NULL;
}
else if (MAWK->ps.program_string != NULL) {
free_STRING(MAWK->ps.program_string);
}
if (MAWK->ps.program_fin != NULL) {
mawk_file_close_(MAWK, MAWK->ps.program_fin);
MAWK->ps.program_fin = NULL;
}
/* redefine SPACE as [ \t\n] */
MAWK->scan_code['\n'] = MAWK->posix_space_flag && MAWK->rs_shadow.type != SEP_MLR ? SC_UNEXPECTED : SC_SPACE;
MAWK->scan_code['\f'] = SC_UNEXPECTED; /*value doesn't matter */
MAWK->scan_code['\013'] = SC_UNEXPECTED; /* \v not space */
MAWK->scan_code['\r'] = SC_UNEXPECTED;
}
/*----------------------------------------
file reading functions
next() and un_next(c) are macros in scan.h
*---------------------*/
static void scan_fillbuff(mawk_state_t * MAWK)
{
unsigned r = 0;
if (MAWK->ps.program_fin != NULL)
r = mawk_fillbuff(MAWK, MAWK->ps.program_fin->fin, (char *) MAWK->ps.buffer, BUFFSZ, 0);
if (r < BUFFSZ) {
MAWK->ps.eof_flag = 1;
/* make sure eof is terminated */
MAWK->ps.buffer[r] = '\n';
MAWK->ps.buffer[r + 1] = 0;
}
}
/* read one character -- slowly */
static int slow_next(mawk_state_t * MAWK)
{
while (*MAWK->ps.buffp == 0) {
if (!MAWK->ps.eof_flag) {
MAWK->ps.buffp = MAWK->ps.buffer;
scan_fillbuff(MAWK);
}
else {
PFILE *q;
if (MAWK->ps.program_fin != NULL) {
mawk_file_close_(MAWK, MAWK->ps.program_fin);
MAWK->ps.program_fin = NULL;
}
if (mawk_parser_pop(MAWK) == 0) {
MAWK->ps.eof_flag = 0;
do {
if (MAWK->pfile_list != NULL) {
MAWK->ps.pfile_name = MAWK->pfile_list->fname;
MAWK->ps.pfile_bytecode = MAWK->pfile_list->bytecode;
q = MAWK->pfile_list;
MAWK->pfile_list = MAWK->pfile_list->link;
MAWK_ZFREE(MAWK, q);
}
else {
MAWK->ps.eof_flag = 1;
goto real_eof;
}
} while (mawk_scan_open(MAWK) != 1);
MAWK->token_lineno = MAWK->lineno = 1;
}
}
}
real_eof:;
return *MAWK->ps.buffp++; /* note can un_next() , eof which is zero */
}
static void eat_comment(mawk_state_t * MAWK)
{
register int c;
while ((c = next(MAWK)) != '\n' && MAWK->scan_code[c]);
un_next();
}
/* this is how we handle extra semi-colons that are
now allowed to separate pattern-action blocks
A proof that they are useless clutter to the language:
we throw them away
*/
static void eat_semi_colon(mawk_state_t * MAWK)
/* eat one semi-colon on the current line */
{
register int c;
while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE);
if (c != ';')
un_next();
}
void mawk_eat_nl(mawk_state_t * MAWK, YYSTYPE *lvalp)
{ /* eat all space including newlines */
while (1)
switch (MAWK->scan_code[next(MAWK)]) {
case SC_COMMENT:
eat_comment(MAWK);
break;
case SC_NL:
MAWK->lineno++;
/* fall thru */
case SC_SPACE:
break;
case SC_ESCAPE:
/* bug fix - surprised anyone did this,
a csh user with backslash dyslexia.(Not a joke)
*/
{
unsigned c;
while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE);
if (c == '\n')
MAWK->token_lineno = ++MAWK->lineno;
else if (c == 0) {
un_next();
return;
}
else { /* error */
un_next();
/* can't un_next() twice so deal with it */
lvalp->ival = '\\';
mawk_unexpected_char(MAWK, lvalp);
if (++(MAWK->compile_error_count) == MAX_COMPILE_ERRORS)
mawk_exit(MAWK, 2);
return;
}
}
break;
default:
un_next();
return;
}
}
int Mawk_lex(YYSTYPE *lvalp, mawk_state_t * MAWK)
{
register int c;
if (MAWK->do_exit)
return -1;
MAWK->token_lineno = MAWK->lineno;
MAWK->lvalp = lvalp;
reswitch:
switch (MAWK->scan_code[c = next(MAWK)]) {
case 0:
ct_ret(EOF);
case SC_SPACE:
goto reswitch;
case SC_COMMENT:
eat_comment(MAWK);
goto reswitch;
case SC_NL:
MAWK->lineno++;
mawk_eat_nl(MAWK, lvalp);
ct_ret(NL);
case SC_ESCAPE:
while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE);
if (c == '\n') {
MAWK->token_lineno = ++MAWK->lineno;
goto reswitch;
}
if (c == 0)
ct_ret(EOF);
un_next();
lvalp->ival = '\\';
ct_ret(UNEXPECTED);
case SC_SEMI_COLON:
mawk_eat_nl(MAWK, lvalp);
ct_ret(SEMI_COLON);
case SC_LBRACE:
mawk_eat_nl(MAWK, lvalp);
MAWK->brace_cnt++;
ct_ret(LBRACE);
case SC_PLUS:
switch (next(MAWK)) {
case '+':
lvalp->ival = '+';
string_buff[0] = string_buff[1] = '+';
string_buff[2] = 0;
ct_ret(INC_or_DEC);
case '=':
ct_ret(ADD_ASG);
default:
un_next();
ct_ret(PLUS);
}
case SC_MINUS:
switch (next(MAWK)) {
case '-':
lvalp->ival = '-';
string_buff[0] = string_buff[1] = '-';
string_buff[2] = 0;
ct_ret(INC_or_DEC);
case '=':
ct_ret(SUB_ASG);
default:
un_next();
ct_ret(MINUS);
}
case SC_COMMA:
mawk_eat_nl(MAWK, lvalp);
ct_ret(COMMA);
case SC_MUL:
mawk_test1_ret('=', MUL_ASG, MUL);
case SC_DIV:
{
static const int can_precede_div[] = { DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD,
GETLINE, INC_or_DEC, -1
};
const int *p = can_precede_div;
do {
if (*p == MAWK->current_token) {
if (*p != INC_or_DEC) {
mawk_test1_ret('=', DIV_ASG, DIV);
}
if (next(MAWK) == '=') {
un_next();
ct_ret(collect_RE(MAWK, lvalp));
}
}
}
while (*++p != -1);
ct_ret(collect_RE(MAWK, lvalp));
}
case SC_MOD:
mawk_test1_ret('=', MOD_ASG, MOD);
case SC_POW:
mawk_test1_ret('=', POW_ASG, POW);
case SC_LPAREN:
MAWK->paren_cnt++;
ct_ret(LPAREN);
case SC_RPAREN:
if (--MAWK->paren_cnt < 0) {
mawk_compile_error(MAWK, "extra ')'");
MAWK->paren_cnt = 0;
goto reswitch;
}
ct_ret(RPAREN);
case SC_LBOX:
ct_ret(LBOX);
case SC_RBOX:
ct_ret(RBOX);
case SC_MATCH:
string_buff[0] = '~';
string_buff[0] = 0;
lvalp->ival = 1;
ct_ret(MATCH);
case SC_EQUAL:
mawk_test1_ret('=', EQ, ASSIGN);
case SC_NOT: /* ! */
if ((c = next(MAWK)) == '~') {
string_buff[0] = '!';
string_buff[1] = '~';
string_buff[2] = 0;
lvalp->ival = 0;
ct_ret(MATCH);
}
else if (c == '=')
ct_ret(NEQ);
un_next();
ct_ret(NOT);
case SC_LT: /* '<' */
if (next(MAWK) == '=')
ct_ret(LTE);
else
un_next();
if (MAWK->getline_flag) {
MAWK->getline_flag = 0;
ct_ret(IO_IN);
}
else
ct_ret(LT);
case SC_GT: /* '>' */
if (MAWK->print_flag && MAWK->paren_cnt == 0) {
MAWK->print_flag = 0;
/* there are 3 types of IO_OUT
-- build the error string in string_buff */
string_buff[0] = '>';
if (next(MAWK) == '>') {
lvalp->ival = F_APPEND;
string_buff[1] = '>';
string_buff[2] = 0;
}
else {
un_next();
lvalp->ival = F_TRUNC;
string_buff[1] = 0;
}
return MAWK->current_token = IO_OUT;
}
mawk_test1_ret('=', GTE, GT);
case SC_OR:
if (next(MAWK) == '|') {
mawk_eat_nl(MAWK, lvalp);
ct_ret(OR);
}
else {
un_next();
if (MAWK->print_flag && MAWK->paren_cnt == 0) {
MAWK->print_flag = 0;
lvalp->ival = PIPE_OUT;
string_buff[0] = '|';
string_buff[1] = 0;
ct_ret(IO_OUT);
}
else
ct_ret(PIPE);
}
case SC_AND:
if (next(MAWK) == '&') {
mawk_eat_nl(MAWK, lvalp);
ct_ret(AND);
}
else {
un_next();
lvalp->ival = '&';
ct_ret(UNEXPECTED);
}
case SC_QMARK:
ct_ret(QMARK);
case SC_COLON:
ct_ret(COLON);
case SC_RBRACE:
if (--MAWK->brace_cnt < 0) {
mawk_compile_error(MAWK, "extra '}'");
eat_semi_colon(MAWK);
MAWK->brace_cnt = 0;
goto reswitch;
}
if ((c = MAWK->current_token) == NL || c == SEMI_COLON || c == SC_FAKE_SEMI_COLON || c == RBRACE) {
/* if the brace_cnt is zero , we've completed
a pattern action block. If the user insists
on adding a semi-colon on the same line
we will eat it. Note what we do below:
physical law -- conservation of semi-colons */
if (MAWK->brace_cnt == 0)
eat_semi_colon(MAWK);
mawk_eat_nl(MAWK, lvalp);
ct_ret(RBRACE);
}
/* supply missing semi-colon to statement that
precedes a '}' */
MAWK->brace_cnt++;
un_next();
MAWK->current_token = SC_FAKE_SEMI_COLON;
return SEMI_COLON;
case SC_DIGIT:
case SC_DOT:
{
mawk_num_t d;
int flag;
static const mawk_num_t double_zero = MAWK_NUM_ZERO;
static const mawk_num_t double_one = MAWK_NUM_ONE;
if ((d = collect_decimal(MAWK, lvalp, c, &flag)) == MAWK_NUM_ZERO) {
if (flag)
ct_ret(flag);
else
lvalp->ptr = (PTR) & double_zero;
}
else if (d == 1.0) {
lvalp->ptr = (PTR) & double_one;
}
else {
lvalp->ptr = (PTR) MAWK_ZMALLOC(MAWK, mawk_num_t);
*(mawk_num_t *) lvalp->ptr = d;
}
ct_ret(DOUBLE);
}
case SC_DOLLAR: /* '$' */
{
mawk_num_t d;
int flag;
while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE);
if (MAWK->scan_code[c] != SC_DIGIT && MAWK->scan_code[c] != SC_DOT) {
un_next();
ct_ret(DOLLAR);
}
/* compute field address at compile time */
if ((d = collect_decimal(MAWK, lvalp, c, &flag)) == 0.0) {
if (flag)
ct_ret(flag); /* an error */
else
lvalp->cp = &MAWK->field[0];
}
else {
if (d > MAX_FIELD) {
mawk_compile_error(MAWK, "$%g exceeds maximum field(%d)", d, MAX_FIELD);
d = MAX_FIELD;
}
lvalp->cp = field_ptr((int) d);
}
ct_ret(FIELD);
}
case SC_DQUOTE:
return MAWK->current_token = collect_string(MAWK, lvalp);
case SC_IDCHAR: /* collect an identifier */
{
unsigned char *p = (unsigned char *) string_buff + 1;
SYMTAB *stp;
string_buff[0] = c;
while ((c = MAWK->scan_code[*p++ = next(MAWK)]) == SC_IDCHAR || c == SC_DIGIT);
un_next();
*--p = 0;
switch ((stp = mawk_find(MAWK, string_buff, 1))->type) {
case ST_NONE:
/* check for function call before defined */
if (next(MAWK) == '(') {
stp->type = ST_FUNCT;
stp->stval.fbp = (FBLOCK *)
mawk_zmalloc(MAWK, sizeof(FBLOCK));
stp->stval.fbp->name = stp->name;
stp->stval.fbp->code = (INST *) 0;
lvalp->fbp = stp->stval.fbp;
MAWK->current_token = FUNCT_ID;
}
else {
lvalp->stp = stp;
MAWK->current_token = MAWK->current_token == DOLLAR ? D_ID : ID;
}
un_next();
break;
case ST_NR:
MAWK->NR_flag = 1;
stp->type = ST_VAR;
/* fall thru */
case ST_VAR:
case ST_ARRAY:
case ST_LOCAL_NONE:
case ST_LOCAL_VAR:
case ST_LOCAL_ARRAY:
lvalp->stp = stp;
MAWK->current_token = MAWK->current_token == DOLLAR ? D_ID : ID;
break;
case ST_FUNCT:
lvalp->fbp = stp->stval.fbp;
MAWK->current_token = FUNCT_ID;
break;
case ST_C_FUNCTION:
lvalp->fbp = calloc(sizeof(FBLOCK), 1);
lvalp->fbp->name = mawk_strdup_(string_buff);
lvalp->fbp->code = NULL;
lvalp->fbp->c_next = MAWK->c_funcs;
MAWK->c_funcs = lvalp->fbp;
MAWK->current_token = C_FUNCT_ID;
break;
case ST_KEYWORD:
MAWK->current_token = stp->stval.kw;
break;
case ST_BUILTIN:
lvalp->bip = stp->stval.bip;
MAWK->current_token = BUILTIN;
break;
case ST_LENGTH:
lvalp->bip = stp->stval.bip;
/* check for length alone, this is an ugly
hack */
while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE);
un_next();
MAWK->current_token = c == '(' ? BUILTIN : LENGTH;
break;
case ST_FIELD:
lvalp->cp = stp->stval.cp;
MAWK->current_token = FIELD;
break;
default:
fprintf(stderr, "stp type:%d\n", stp->type);
mawk_bozo(MAWK, "mawk_find returned bad st type");
}
return MAWK->current_token;
}
case SC_UNEXPECTED:
lvalp->ival = c & 0xff;
ct_ret(UNEXPECTED);
}
return 0; /* never get here make lint happy */
}
/* collect a decimal constant in temp_buff.
Return the value and error conditions by reference */
static mawk_num_t collect_decimal(mawk_state_t *MAWK, YYSTYPE *lvalp, int c, int *flag)
{
register unsigned char *p = (unsigned char *) string_buff + 1;
unsigned char *endp;
mawk_num_t d;
*flag = 0;
string_buff[0] = c;
if (c == '.') {
if (MAWK->scan_code[*p++ = next(MAWK)] != SC_DIGIT) {
*flag = UNEXPECTED;
lvalp->ival = '.';
return MAWK_NUM_ZERO;
}
}
else {
while (MAWK->scan_code[*p++ = next(MAWK)] == SC_DIGIT);
if (p[-1] != '.') {
un_next();
p--;
}
}
/* get rest of digits after decimal point */
while (MAWK->scan_code[*p++ = next(MAWK)] == SC_DIGIT);
/* check for exponent */
if (p[-1] != 'e' && p[-1] != 'E') {
un_next();
*--p = 0;
}
else { /* get the exponent */
if (MAWK->scan_code[*p = next(MAWK)] != SC_DIGIT && *p != '-' && *p != '+') {
*++p = 0;
*flag = BAD_DECIMAL;
return MAWK_NUM_ZERO;
}
else { /* get the rest of the exponent */
p++;
while (MAWK->scan_code[*p++ = next(MAWK)] == SC_DIGIT);
un_next();
*--p = 0;
}
}
errno = 0; /* check for mawk_overflow/underflow */
d = strtonum(string_buff, (char **) &endp);
#ifndef STRTOD_UNDERFLOW_ON_ZERO_BUG
if (errno)
mawk_compile_error(MAWK, "%s : decimal %sflow", string_buff, d == 0.0 ? "under" : "over");
#else /* ! sun4 bug */
if (errno && d != 0.0)
mawk_compile_error(MAWK, "%s : decimal mawk_overflow", string_buff);
#endif
if (endp < p) {
*flag = BAD_DECIMAL;
return 0.0;
}
return d;
}
/*---------- process escape characters ---------------*/
static int collect_string(mawk_state_t * MAWK, YYSTYPE *lvalp)
{
register unsigned char *p = (unsigned char *) string_buff;
int c;
int e_flag = 0; /* on if have an escape char */
while (1)
switch (MAWK->scan_code[*p++ = next(MAWK)]) {
case SC_DQUOTE: /* done */
*--p = 0;
goto out;
case SC_NL:
p[-1] = 0;
/* fall thru */
case 0: /* unterminated string */
mawk_compile_error(MAWK, "runaway string constant \"%.10s ...", string_buff, MAWK->token_lineno);
mawk_exitval(MAWK, 2, -1);
case SC_ESCAPE:
if ((c = next(MAWK)) == '\n') {
p--;
MAWK->lineno++;
}
else if (c == 0)
un_next();
else {
*p++ = c;
e_flag = 1;
}
break;
default:
break;
}
out:
lvalp->ptr = (PTR) mawk_new_STRING(MAWK, e_flag ? mawk_rm_escape(MAWK, string_buff)
: string_buff);
return STRING_;
}
static int collect_RE(mawk_state_t * MAWK, YYSTYPE *lvalp)
{
register unsigned char *p = (unsigned char *) string_buff;
int c;
mawk_string_t *sval;
while (1)
switch (MAWK->scan_code[*p++ = next(MAWK)]) {
case SC_DIV: /* done */
*--p = 0;
goto out;
case SC_NL:
p[-1] = 0;
/* fall thru */
case 0: /* unterminated re */
mawk_compile_error(MAWK, "runaway regular expression /%.10s ...", string_buff, MAWK->token_lineno);
mawk_exitval(MAWK, 2, -1);
case SC_ESCAPE:
switch (c = next(MAWK)) {
case '/':
p[-1] = '/';
break;
case '\n':
p--;
break;
case 0:
un_next();
break;
default:
*p++ = c;
break;
}
break;
}
out:
/* now we've got the RE, so compile it */
sval = mawk_new_STRING(MAWK, string_buff);
lvalp->ptr = mawk_re_compile(MAWK, sval);
free_STRING(sval);
return RE;
}
void mawk_parser_push(mawk_state_t * MAWK)
{
if (MAWK->pstack_used >= MAWK->pstack_alloced) {
MAWK->pstack_alloced += 8;
MAWK->mawk_parser_stack = realloc(MAWK->mawk_parser_stack, sizeof(mawk_parse_state_t) * MAWK->pstack_alloced);
}
memcpy(&MAWK->mawk_parser_stack[MAWK->pstack_used], &MAWK->ps, sizeof(mawk_parse_state_t));
MAWK->pstack_used++;
memset(&MAWK->ps, 0, sizeof(mawk_parse_state_t));
}
int mawk_parser_pop(mawk_state_t * MAWK)
{
if (MAWK->pstack_used <= 0)
return 0;
mawk_zfree(MAWK, MAWK->ps.buffer, BUFFSZ + 1);
MAWK->ps.buffer = NULL;
MAWK->ps.buffp = NULL;
MAWK->pstack_used--;
memcpy(&MAWK->ps, &MAWK->mawk_parser_stack[MAWK->pstack_used], sizeof(mawk_parse_state_t));
if (MAWK->pstack_used == 0) {
free(MAWK->mawk_parser_stack);
MAWK->mawk_parser_stack = NULL;
MAWK->pstack_alloced = 0;
}
return 1;
}
/* error handling */
void mawk_unexpected_char(mawk_state_t * MAWK, YYSTYPE *lvalp)
{
int c = lvalp->ival;
fprintf(stderr, "%s: %u: ", MAWK->progname, MAWK->token_lineno);
if (c > ' ' && c < 127)
fprintf(stderr, "unexpected character '%c'\n", c);
else
fprintf(stderr, "unexpected character 0x%02x\n", c);
}
static void missing(mawk_state_t *, int, const char *, int);
static const struct token_str {
short token;
char *str;
} token_str[] = { /* read-only */
{EOF, "end of file"},
{NL, "end of line"},
{SEMI_COLON, ";"},
{LBRACE, "{"},
{RBRACE, "}"},
{SC_FAKE_SEMI_COLON, "}"},
{LPAREN, "("},
{RPAREN, ")"},
{LBOX, "["},
{RBOX, "]"},
{QMARK, "?"},
{COLON, ":"},
{OR, "||"},
{AND, "&&"},
{ASSIGN, "="},
{ADD_ASG, "+="},
{SUB_ASG, "-="},
{MUL_ASG, "*="},
{DIV_ASG, "/="},
{MOD_ASG, "%="},
{POW_ASG, "^="},
{EQ, "=="},
{NEQ, "!="},
{LT, "<"},
{LTE, "<="},
{GT, ">"},
{GTE, ">="},
{MATCH, NULL},
{PLUS, "+"},
{MINUS, "-"},
{MUL, "*"},
{DIV, "/"},
{MOD, "%"},
{POW, "^"},
{NOT, "!"},
{COMMA, ","},
{INC_or_DEC, NULL},
{DOUBLE, NULL},
{STRING_, NULL},
{ID, NULL},
{FUNCT_ID, NULL},
{BUILTIN, NULL},
{IO_OUT, NULL},
{IO_IN, "<"},
{PIPE, "|"},
{DOLLAR, "$"},
{FIELD, "$"},
{0, NULL}
};
/* if paren_cnt >0 and we see one of these, we are missing a ')' */
static const int missing_rparen[] = /* read-only */
{ EOF, NL, SEMI_COLON, SC_FAKE_SEMI_COLON, RBRACE, 0 };
/* ditto for '}' */
static const int missing_rbrace[] = /* read-only */
{ EOF, BEGIN, END, 0 };
void Mawk_error(mawk_state_t *MAWK, char *s_unused)
{
const struct token_str *p;
const int *ip;
const char *s;
s = (char *) 0;
for (p = token_str; p->token; p++)
if (MAWK->current_token == p->token) {
s = (p->str == NULL) ? string_buff : p->str;
break;
}
if (!s) /* search the keywords */
s = mawk_find_kw_str(MAWK->current_token);
if (s) {
if (MAWK->paren_cnt)
for (ip = missing_rparen; *ip; ip++)
if (*ip == MAWK->current_token) {
missing(MAWK, ')', s, MAWK->token_lineno);
MAWK->paren_cnt = 0;
goto done;
}
if (MAWK->brace_cnt)
for (ip = missing_rbrace; *ip; ip++)
if (*ip == MAWK->current_token) {
missing(MAWK, CL_BRACE, s, MAWK->token_lineno);
MAWK->brace_cnt = 0;
goto done;
}
mawk_compile_error(MAWK, "syntax error at or near %s", s);
}
else /* special cases */
switch (MAWK->current_token) {
case UNEXPECTED:
mawk_unexpected_char(MAWK, (YYSTYPE *)MAWK->lvalp);
goto done;
case BAD_DECIMAL:
mawk_compile_error(MAWK, "syntax error in decimal constant %s", string_buff);
break;
case RE:
mawk_compile_error(MAWK, "syntax error at or near /%s/", string_buff);
break;
default:
mawk_compile_error(MAWK, "syntax error");
break;
}
return;
done:
if (++(MAWK->compile_error_count) == MAX_COMPILE_ERRORS)
mawk_exit(MAWK, 2);
}
static void missing(mawk_state_t *MAWK, int c, const char *n, int ln)
{
const char *s0, *s1;
if (MAWK->ps.pfile_name) {
s0 = MAWK->ps.pfile_name;
s1 = ": ";
}
else
s0 = s1 = "";
mawk_errmsg(MAWK, 0, "%s%sline %u: missing %c near %s", s0, s1, ln, c, n);
}