diff --git a/Changelog b/Changelog new file mode 100644 index 0000000..d3752d3 --- /dev/null +++ b/Changelog @@ -0,0 +1,283 @@ +Changelog for libmawk +~~~~~~~~~~~~~~~~~~~~~ +libmawk 1.0.0 (released: 2018-11-11, r1276) + [API] -Change: resume capable function call entries + [build] -Fix: make clean should remove custom C test objects and executables + [build] -Fix: incomplete install headers list + [build] -Add: Makefile variable for selecting zmalloc alternatives + [build] -Add: optional DESTDIR for debian + [build] -Add: --prefix, --debug, --symbols + [build] -Add: help + [build] -Chnage: upgrade to tmpasm and latest version of scconfig + [build] -Add: optional support for soname + [build] -Change: use $(MAKE) instead of make in central Makefiles for parallel build + [build] -Add: detect -rdynamic + [core] -Change: extend mpow2 for covering new celltypes + [core] -Cleanup: rename _STOP and _RANGE to _RANGE_STOP and _RANGE_CHK to make their purpose more clear + [core] -Fix: return read error in getline doesn't fall in infinite loop in fill buff anymore + [core] -Add: MAWK_MEM_PEDANTIC compile time option; when set, try to free all allocation instead of letting zmalloc catch them all + [core] -Fix: don't free the hash before freeing argv and others + [core] -Fix: make sure code blocks are free'd only once, but are really free'd + [core] -Fix: memory leak when initializing argv + [core] -Fix: let mawk_delete() free arrays, don't manually do that on argv + [core] -Change: let mawk_delete() free user function + [core] -Change: fin is under files - higher level code should do all access through the file infra + [core] -Fix: FIN is only about buffering, no open/close should go through that API + [core] -Fix: reference count vf structs since one vf might be open multiple times from FILE_NODEs (for input and output) and should be destroyed only once + [core] -Fix: closing all files, not only output files, is not a pedantic-mem feature - not doig it could lead to leaking fds and other resources even if zmalloc pools are freed centrally + [core] -Fix: there is only one way files are closed, only one code is needed + [core] -Fix: mawk_fill_buff returns a signed long for indicating temporary error (fixed for 64 bit) + [core] -Fix: rename state field userdata to func_userdata to make the purpose more clear + [core] -Add: ctx_userdata intended for the host app for storing whatever data, per context + [core] -Add: have a field in the state struct for the C function being called back so that subsequent execute()s may know about that + [core] -Fix: memory leak: free data of c function calls compiled into the code at uninit + [core] -Fix: C89 fixes (func-data ptr conv through union, don't use strcasemp(), strdup, snprintf, //, vararg macros) + [core] -Fix: wrong arg passed to isnan - cell instead fo double value + [core] -Fix: vio fclose survives if there's no close function available (double close?) + [doc] -Add: short desc about the vm + [doc] -Add: explain how execution/resume works + [doc] -Add: describe deep recursion + [doc] -Add: minimalistic install + [example_apps] -Add: blocking fifo test also tests bifunct getline in an user function + [example_apps] -Add: make sure to demonstrate how the code is suspended even in user functions + [example_apps] -Update: 15_call for the new function call API + [example_apps] -Add: example on run limit resume + [example_apps] -Add: central Makefile that runs all examples + [example_apps] -Add: call a c function from c using awk's func call (demonstrate that awk and c functions should not make any difference) + [example_apps] -Fix: hash_re won't init std pipes twice + [example_apps] -Fix: custom vio test sets up pipes correctly and closes the hass from the app's end of the deal at the end + [example_apps] -Add: better explanation of the c_func_call example and hook it in the all-test-chain + [libmawk] -Fix: C function call from execute with new function call conventions + [libmawk] -Fix: call_functionp increases sp before push else the stack pointer points to the wrong place + [libmawk] -Fix: restore sp on function call failure + [libmawk] -Fix: before closing stdio on awk side, close stdin on app side to make sure both ends can be closed + [libmawk] -Fix: do not leak function arguments already pushed on stack if an awk function call fails due to bad arguments + [libmawk] -Fix: allow c functions to be called using mawk_call_function*() + [libmawk] -Fix: don't close stdin if it's not open + [libmawk] -Fix: don't use strdup() for c function reg + [libmawk] -Fix: don't close non-fifo stdin as fifo + [libmawk] -Fix: priting uninitialized cell should work and should result in empty string + [libmawk] -Add: wants_to_exit flag so that a libmawk caller knows a script wants to exit + [libmawk] -Add: function call API with cell argc/argv + [parser] -Fix: really reset the parser stack when it gets freed - used to segfault for two includes on the topmost level + [parser] -Fix: allow excess newlines in the main context - this fixes a corner case around multiple includes + [regression] -Add: test for bifunct call() + [regression] -Add: test for acall() (dynamic call with array as argument) + [regression] -Add: test for stack grow on deep recursion + [regression] -Add: test program for awk function call corner cases + [regression] -Add: libmawk corner case test base + [regression] -Fix: libmawk func call test diag messages are more clear on what's wrong or good + [rexp] -Fix: infinite loop bug (gawk test noloop1 and noloop2) - contributed by Thomas E. Dickey as a mawk fix + [vio] -Fix: set eof flags on fifo creation (fixes a conditional jump on uninited var bug) + [vio] -Fix: closing FIN closes the vio layer even if fin buffer had not been initialized + [vio] -Fix: close vf throug FIN to make sure buffers are also freed + [vio] -Fix: simpler common header with refco and who-knows-what in the future + [vio] -Fix: initialize refco to 0 + [vm] -Change: avoid recursive calls to execute() on plain hardwired awk function calls + [vm] -Change: wants_to_ret is execution state instead - need to remember whether we are running range1 or range2 + [vm] -Fix: leak in function return value stack handling + [vm] -Add: new convention in bi_funct calling: return stack may be a directive for executing an user functioin (for bi_call and bi_acall) + [vm] -Fix: mawk_bi_call() and bifunct acall() follow the new no-recurse calling convention + [vm] -Add: new low level c->awk call functions trying to follow the no-recurse execute_() conventions + [vm] -Move: execute API into new execute.h + [vm] -Add: a separate execution exit status for exit() + [vm] -Add: execution run limit and resume + [vm] -Fix: bi_funct getline can indicate 'nomore' instead of eof + [vm] -Fix: properly save and restore old_stack_base and old_sp on stack danger (stack growth) + [vm] -Cleanup: remumber types (continous numbering) + [vm] -Add: more stack-related cell types and make room for future non-string additions + [vm] -Fix: compare() returns double so that nan-checks can be performed + [vm] -Fix: error return with value from execute() + [vm] -Fix: make MAWK->pow big enough and make it safe (need to be unsigned long for these values and for the bit ops) + + +libmawk 0.10.1 (released: 2014-07-11, r1056) + [compiler] -Fix: zero-offset bug in jmp push (xref test crash) + [build] -Add: make test should run the standard regression tests; do not run tests automatically on build + [core] -Add: zfifo: generic fifo implemented using zmalloc/zfree + [core] -Add: mawk_file_find() has a create flag that can disable creating a new file node - useful for forcing existing/open files + [core] -Cleanup: move fin struct to fin.h + [core] -Cleanup: rename FIN to mawk_input_t for naming consistency + [core] -Fix: rewritten mawk_FINgets - cleaner code that is non-blocking-safe + [core] -Fix: zrealloc() should not crash if old ptr is NULL + [core] -Fix: safer vf close, close outputs only in uninit, when EXIT has finished for sure + [vio] -Fix: input is always fd, output is always FILE *; remove fgets and stdin related API + [vio] -Clenaup: cleaner ways recognizing "/dev/std*" + [vio] -Add: support for /dev/fd/xxx (fdopen() wrapper) + [vio] -Add: an extra layer to dispatch vio calls - multiple vio implementations can coexist + [vio] -Add: virtualize vio init (non-per-file vio hooks); use MAWK->vio_init instead of hardwired vio_orig (so that vio_orig can be omitted) + [vio] -Del: setbuf from API - this callback was a hack, now all done by the call that sets up stdio + [vio] -Change: replace is_stdouts with a more generic mark_no_close hook + [vio] -Fix: use zmalloc/zfree for the mawk_vio_t allocations + [vio] -Add: a generic fifo implementation that also replaces the artificial FIN buf + [regression] -Cleanup: remove test/ and convert all shell script based tests to make-based tests in regression/ + [tools] -Fix: find_globals uses the right dir + [tools] -Fix: sym prefix validator won't complain for main() + [API] -Add: file name validation hook + [API] -Add: call to register a file by name:type and vio handle in the high level list of files; also sets up input buffering as needed + [API] -Add: call to close the default input fifo (sending eof to the script) + [API] -Add: convenient stdio-init call for multi-staged init + [example_app] -Add: custom output pipe hack examples and update pipe in buffer fill examples with all possible corner cases + [example_app] -Add: file name validation and vio hook examples + + +libmawk 0.10.0 (released: 2014-06-26, r937) + Major changes in core, mostly for virtual arrays: + [array] -Add: introduce new VM instructions for all array-element-write operations and split the lvalue rule of the grammar; besides being new instructions, they do the same in the execution switch as before + [array] -Add: virtualize array operations + [array] -Add: virtual, per instance ENVIRON[] that copies environ[] upon the first access and affects exec() + [array] -Del: ST_ENV: ENVIRON[] is not a global special case anymore, just a builtin array with a different implementation + [core] -Fix: replace "short int type" and a bunch of C_ macros for cell type with a proper enum + [core] -Add: macro option for cell operators for speed + [core] -Fix: set ERRNO in fin after a read error + [core] -Cleanup: mawk_ prefix zmalloc, zfree, ZMALLOC, ZFREE and a lot of other constants and macros + [init] -Add: a flag for indicating that it is ok for initialize_argv() to end up without a script + [dump] -Fix: print the code of illegal instructions + [dump] -Add: da_text: use blank lines to separate blocks: functions, BEGIN, MAIN, END + [dump] -Change: da_text: after user functions, dump code in BEGIN-MAIN-END order - it is more intuitive this way + [dump] -Add: text dump includes a summary of the global symbol table + [dump] -Change: introduce -W dumpsym for dumping symbols independently of dumping code + [da_bin] -Fix: properly save/load C_SNULL and C_SPACE (special split argument types) to/from binary dump + API CHANGES affecting libmawk: + [core] -Clenaup: rename STRING to mawk_string_t + [core] -Clenaup: rename CELL to libmawk_cell_t + [core] -Change: use an union instead of dval in CELL, to make room for other data types + [core] -Add: new cell type C_ARR_REF (should be execute()-internal); an array reference by name and index (instead of pointer to a cell) + [API] -move parsing the script from init stage1 to stage2 to give the host app a chance to inject things into the context before any code is parsed + [API] -Change: get_var should return a read-only CELL - direct modification of the cell is a bad idea due to possible side effects required at write + [API] -Fix: libmawk_print_cell shall be called with the context (mawk_state_t) as any other call in the API + [API] -Change: new, easier-to-use c-func calling conventions and helpers + [array] -Cleanup: rename ARRAY to mawk_array_t + New libmawk API features: + [API] -Add: helper functions to return the numeric value of a cell + [API] -Add: an easy way to set a cell to empty + [API] -Add: high level array set_at calls and scalar set calls + +libmawk 0.9.7 (released: 2014-05-17, r732) + Major code structure changes: + [vio] -Split: file/pipe open/close operations to virtual IO (vio) + [linux] -Add: initial linux kernel module effort (defunct yet) + [array] -Add: virtualized array access (except for write) with callback functions + [libmawk] -Split: lmawk-comp and lmawk-exec frames - most code is in common yet + [libmawk] -Add: MAWK_NO_FORK for configuring against system() and fork() + Code cleanup: + [da_bin] -Cleanup: compiler warnings around da_bin (binary script save/load) + [da_bin] -Split: da_bin and da_bin_helper: precompiled binary script load doesn't directly call read() but an user provided callback for virtualization + [da_bin] -Fix: array creation on binary load + [da_bin] -Fix: when resetting code base size for loading binary, use zmalloc for proper allocation size + [build] -Fix: Makefile cleanup for portability + [libmawk] -Fix: prefix NF, RS and other similar macro names with MAWK_ - they collide in the Linux kernel + [test_3rd] -Add: scripts for running optional 3rd party tests from vendor/ (for the gawk set, without much configuration or filtering) + [regex] -Change: error handling without setjmp/longjmp (return values and a bunch of checks) + [regex] -Fix: regex lib uses zmalloc memory allocation to ensure all memory is free'd after a context is destroyed + [regex] -Add: tiny test program to trigger almost all possible error conditions in the regex lib + [regex] -Fix: move runtime states of the regex lib in the mawk struct to make it reentrant + [regex] -Fix: bi_vars are part of the mawk struct because parallel scripts may have different values for the same variables or even modify them from script + More portable doubles (and numeric): + [numeric] -Add: use porty's math_wrap for log() to get rid of undeterministic fpe + [numeric] -Add: use PM math protection copied from libporty + [numeric] -Add: new builtin function isnan() + [numeric] -Fix: virtualize strtod int strtonum: on a real int-only platform (e.g. Linux kernel) there won't be strtod at all + Fixes of the core functionality: + [libmawk] -Fix: make sure runtime error exit code takes over in final_error_code + [bi_func] -Fix: substr for start indices less than 1 behave similar to how substr() handles length overruns (silently truncate) - gawk has the same behavior + [bi_func] -Fix: gsub() patch for the ^ bug + [io] -Fix: redirection conditions messed up in r349 + [libmawk] -Fix: disable -v only when MAWK_NO_EXEC is set, and don't even consider MAWK_NO_COMP + [libmawk] -Fix: fin_exec should respect a MAWK exit request and not retry reading + [libmawk] -Fix: if FIN can not be open (invalid file name, nonexisting file), don't segfault but return error + +libmawk 0.9.6 (missed) + +libmawk 0.9.5 (released: 2012-10-13, r527) + [libmawk] -Add: memory usage accounting figures; -Wmaxmem=size sets maximum + memory allocation in bytes (optional suffixes: k and m) + [libmawk] -Add: binary save/load functionality (-Wcompile and -b) + works on 32 bit systems only (will be fixed later) + [libmawk] -Change: replace hardwired double references with generin numeric + (C_NUM, num_t) - libmawk can be configured to use int or + double as numeric format (./configure --numeric=) + [libmawk] -Cleanup: (portability tweak) const correctness in mawk_reverse_find + [libmawk] -Add: (portability tweak) _POSIX_SOURCE + [libmawk] -Fix: (portability tweak) explicit rules for rexp/*.o incuding + $(CC) command line for old fashioned make having the wrong + implicit rule + [libmawk] -Cleanup: (portability tweak) k&r style function + declarations/definitions removed from rexp lib + [libmawk] -Fix: (portability tweak) missing prototypes to avoid implicit + declaration warning + [libmawk] -Change: disable two zfree() calls that would free main code to + avoid double frees + [libmawk] -Add: compile with app clfags + [libmawk] -Cleanup: move scconfig'd -D options to conf.h from Makefile.conf + to make compiler command lines shorter + [libmawk] -Cleanup: new vars.[ch] for collecting variable-related code + [libmawk] -Fix: mawk_mm (aka free_later) mechanism memory handling erros + when realloc()'d + [doc] -Add: explain design decisions behind our two gnu-dependencies + [doc] -Add: portability table (per system: compiles out of the box, has + FPE problems, awklib test ran fine) + +libmawk 0.9.4 (released: 2010-12-26, r392) + [scconfig] -Add: detect pipe(2) and set NO_PIPE, replacing HAVE_REAL_PIPES + [scconfig] -Add: detect size_t include file (removes the ifdef mess) + [scconfig] -Add: require detection of cc/fpic (fixes compilation on amd64) + [libmawk] -Cleanup: del PROTO() macro: libmawk requires at least C89 compiler + [libmawk] -Cleanup: remove revision history and file name comments from *.[ch] + copyright notice above all mawk copyright notices + [libmawk] -Cleanup: version to clearly indicate that this is lmawk + [libmawk] -Cleanup: unified indentation in all .c and .h files + [libmawk] -Cleanup: remove a lot of compiler warnings, fix const correctness + [libmawk] -Cleanup: Makefile.dep generation rule; proper, up-to-date Makefile.dep + [libmawk] -Del: remove FAKE_PIPES support (was only for DOS anyway) + [libmawk] -Del: void * is a must, PTR should not fall back to char * + [libmawk] -Del: cut out MSDOS support with all its "dreaded segment nonsense" + [libmawk] -Fix: get bison to generate a reentrant parser + [libmawk] -Fix: move static globals to mawk state struct + [libmawk] -Fix: memory leaks + + +libmawk 0.9.3 (released: 2010-12-21, r307) + [libmawk] -Fix: use -fPIC for compiling (helps on amd64) + [libmawk] -Add: new builtin variable ERRNO with a different error code for each corner case in extension calls + [libmawk] -Add: new built-in variable LIBPATH for library search path + - skip loading a script if name started with '+' and the script was already loaded + - if script file name starts with '/', assume it to be an absolute path and skip LIBPATH search + [scconfig] -Add: scconfig configures/generates Makefiles of libmawk after autodetecting system features + [scconfig] -Del: config-user, v7, atarist: scconfig should take care of all supported systems + [scconfig] -Add: central ./configure and Makefile + [libmawk] -Fix: order of running BEGINs with "include" + [libmawk] -Fix: memory leaks around parser state + [awklib] -Add: three awk libraries: + - lib_rand for reproducible pseudo random numbers + - lib_array for packing/unpacking/printing arrays + - lib_ascii for converting characters to ASCII codes + + +libmawk 0.9.2 (released: 2010-12-12, r229) + [libmawk] -Fix: invalid memory handling around cells + [libmawk] -Fix: manual page dates and author and project/executable name + [libmawk] -Add: dynamic awk function calls (call() and acall()) + [libmawk] -Add: dynamic awk variable value fetch (valueof()) + [libmawk] -Add: include + +libmawk 0.9.1 (released: 2009-08-14, r198) + [API] -Add: remember userdata when registering and calling back c functions + [API] -Add: option to suppress undefined function calls + [API] -Add: new call allowing awk function calls without varargs + [API] -Change: split up libmawk_initialize in 3 stages (optional) + [libmawk] -Del: autotools, keep last generated Makefile for hand editing + [libmawk] -Del: config.h (merged in Makefile.conf.in) + [libmawk] -Rename: mawk -> lmawk (the binary) + [libmawk] -Change: print cell prints integers without .000000 + [testapp] -Move: testapp out from libmawk (new dir structure) + [testapp] -Fix: stack handling bug + [doc] -Add: manual pages for libmawk calls + +libmawk 0.9.0 (released: 2009-07-22, r146) + * Initial release, based on mawk 1.3.3 + * r3...r8 Separate libmawk_call() from execute(); allow undefined functions + * r9...r42 globals to struct (reentrant) + * r43...r92 rename non-static functions to have a mawk_ prefix + * r92...r145 libmawk + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e0fc168 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +all: src/libmawk/Makefile.conf + cd src && $(MAKE) + +install: src/libmawk/Makefile.conf + cd src && $(MAKE) install + cd doc && $(MAKE) install + +uninstall: + cd src && $(MAKE) uninstall + +linstall: src/libmawk/Makefile.conf + cd src && $(MAKE) linstall + +clean: + cd src && $(MAKE) clean + +distclean: + cd src && $(MAKE) distclean + cd scconfig && $(MAKE) clean + +test: + cd src && $(MAKE) test + +src/libmawk/Makefile.conf: + @echo "Please run ./configure first."; false diff --git a/README b/README new file mode 100644 index 0000000..ebb6ee0 --- /dev/null +++ b/README @@ -0,0 +1,42 @@ +1. Introduction + +Libmawk is a fork of mawk 1.3.3, restructured for embedding. This means the +user gets libmawk.h and libmawk.so and can embed awk scripting language +in any application written in C. For more information, check out the web page +at http://repo.hu/projects/libmawk and the documentation in doc/. + +2. Requirements and compiling + +ANSI C compiler, POSIX shell and make are required for compiling libmawk. +Bison should be installed for developing libmawk; if it is not installed, +local changes to the grammar will be ingored. + +./configure; make + +On top of usual scconfig arguments, ./configure accepts --numeric=TYPE, +where TYPE is int or double. Default is double. This switch affects +what type libmawk stores numbers in. + +3. installation + +Run "make install" or "make linstall". The linstall version +will use symlinks instead of actual copying of files which is useful if +you develop libmawk, the library itself. + +Debian package can be installed from http://repo.hu/debian. + +4. Compatibility with mawk + +Compatibility with mawk is maintained to some degree. Currently libmawk +offers a small set of extra features on awk level while providing +everything that mawk provides. A valid mawk script will work with +libmawk/lmawk without modification. However, the new features will work +only with libmawk/lmawk and not mawk, so portable scripts shouldn't +depend on them. All extensions are clearly marked in the manual. + +Conclusion: libmawk will compile and install mawk executable, which is +backward compatible with mawk executable but also adds some extension +features. + +Awklib depends on one of the libmawk features (include), thus awklib +scripts won't work with other awk implementations without tweaking. diff --git a/Release_notes b/Release_notes new file mode 100644 index 0000000..eebcfd1 --- /dev/null +++ b/Release_notes @@ -0,0 +1,5 @@ +Release notes for libmawk 1.0.0 + +This release introduces runlimits, better floating point handling on +corner cases (i.e. NaN) and memory leak cleanups. + diff --git a/configure b/configure new file mode 100755 index 0000000..a960b35 --- /dev/null +++ b/configure @@ -0,0 +1,3 @@ +#!/bin/sh +cd scconfig && make && ./configure "$@" + diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..a4ce05e --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,7 @@ +all: + +include ../src/libmawk/Makefile.conf + +install: + $(MKDIR) $(DOCDIR) + $(CP) $(PWD)/*.html $(DOCDIR) diff --git a/doc/Release_howto b/doc/Release_howto new file mode 100644 index 0000000..a225262 --- /dev/null +++ b/doc/Release_howto @@ -0,0 +1,5 @@ +1. update the changelog +2. update the release notes +3. change the version number in src/scconfig/hooks.c +4. test a fresh checkout +5. svn tag diff --git a/doc/TODO b/doc/TODO new file mode 100644 index 0000000..96264eb --- /dev/null +++ b/doc/TODO @@ -0,0 +1,77 @@ +000. full doc rewrite + +Features: + - c->awk func call should be able to pass arrays + +00. vio rewrite + - add all vio in a linked list in MAWK for garbage collection at mawk_uninit + - add a force-clenup hook for this; but also replace mawk_close_out_pipes() + - hooks? wrapper? + - document file_name_rewrite + - document vio + +0. bugs + - string ref crash: + for(n = 0; n < 1000000; n++) A[n] = "foo" + if refcount of "foo" reaches 65535, it crashes + - mpow2 should be static + +1. restrictions + - detect and use LDFLAGS -dynamic + - split compile and run into separate libs: + - implement a Single Safe number->string converter; + grep for OFMT and CONVFMT and INT_FMT to find all the buggy sprintf based + implementations + - split da_bin to exec and comp + - fix error.c: it shouldn't depend on stdio.h and it shouldn't print + to stderr anyway (incompatible with the lib idea) + - check whether int-compiled lmawk handles OFMT/CONVFMT properly + - floating point: + - try to find a platform with FPE for overflow and test + - consider a -W nandebug option so that the user knows where the script + went wrong; but he could also just check from the script + - if isinf() is avaialable, check for inf() result and convert them to nan + + +1.5. + - check all zmalloc() and zrealloc() calls - they may return NULL and callers + should return in that case so that runtime error takes over + +2. porting + - test on UNIX + - provide a real alternative to realpath() + +3.0 extend arrays + - array copy + - array in array (for orig implementation only?) + - length(array) as in gawk? POSIX: length() works on strings only + update test_3rd funlen accordingly! + +3.1 features, minor bugs + - debugging (gdb scripts); location-file instruction to track src file changes + - consider printf with no arg to work like in gawk; also check posix about this + test_3rd: printf0 + - decide whether regex "^+" (and "^*"?) should be accepted and treated as + plain + and * at the beginning of the string; update test_3rd reindops + (check posix regex) + - decide whether regex should support binary; related tests to update: + test_3rd jared, regx8bit + - posix FS point 3. requires that ^ work in FS; check test_3rd uparrfs + - introduce a new symtab flag for remembering builtin vars and arrays; + when -W dumpsym, do not dump these unless verbose + - introduce a -W dumpallsym (for verbose symdumps) + - write regression test for flush() (it used to pass the wrong pointer) + +4. lib fineprint + - expose mawk_append_input_file + +5. optimization + - mawk_find_bi_ptr(): use perfect hash instead of linear search + does it really matter? + - peephole: + - 'print "a", a, "b", b, "c", c' results in push/cat pairs; + make the parser emit a lot of pushes and replace cat with catN + - 'expr = 1' will add a pop; there should be another type of assignment + that doesn't push anything + - replace tail recursion (call+ret) + diff --git a/doc/autotools.html b/doc/autotools.html new file mode 100644 index 0000000..be8a329 --- /dev/null +++ b/doc/autotools.html @@ -0,0 +1,22 @@ + + +

Why not using autotools

+

+I believe autotools is the wrong designed and poor implementation, partly +trying to solve the wrong problem. The fix for this is a better design. +For example scconfig - which is the system that configures libmawk. It +can do everything it needs to do (yes, including cross compilation). +

+Mawk generally dosn't require too many special things and should compile +fine on POSIX systems. If it doesn't compile for you, you have the following +choices: +

    +
  1. you can contact me, report bugs, so I can fix scconfig +
  2. you can bypass scconfig and manually create Makefile.conf + and conf.h for your system yourself (won't help much in + fixing scconfig, tho) +
  3. you can create and maintain your own autotools version (but official + libmawk will not include support for autotools) +
+ + diff --git a/doc/developer/README b/doc/developer/README new file mode 100644 index 0000000..1433e34 --- /dev/null +++ b/doc/developer/README @@ -0,0 +1,6 @@ +This directory hosts documentation about libmawk internals. + +These files are useful for developers of libmawk and in some specail +case for application developers if they plan to use advanced features +of libmawk. + diff --git a/doc/developer/array_vs_func.txt b/doc/developer/array_vs_func.txt new file mode 100644 index 0000000..77d5000 --- /dev/null +++ b/doc/developer/array_vs_func.txt @@ -0,0 +1,114 @@ +API: Virtual array vs. function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. Introduction: virtual arrays + +Libmawk implements support for virtual arrays: arrays that do not simply +store data in a hash. This feature is designed for maximal flexibility: +awk arrays have a set of hooks (function pointers) and whenever the +bytecode interpreter has to read, list or modify an array, it will +call the hook functions. The original array implementation is just +a set of hooks simply storing data without side effects. The ENVIRON[] +array is set up with another set of hooks that syncs the environment +with awk array (and calls the original hooks for the actual data storage). + + +2. API: arrays are function calls at the end of the day + +This also means that an application developer has two alternative paths +to provide bindings to the application code: + - explicit function calls + - implicit function calls through an awk array + +For example if the application wants to expose direct I/O port access, +it may: + - implement functions io_out(port, value) and io_in(port) + - implement an array IO[port] + +However, having side effects is usually not desirable. An awk program that +uses functions not declared in the awk code makes it clear that these functions +are external, whereas an array may be just a global awk variable, side effects +are not obvious. + + +3. how to chose? + +While technically the two ways of the API design are equivalent in the sense +that both end up in function calls, there are always considerations that +may make one better than the other for a specific application. Below are +the pros and cons: + +Function calls, pro: + - easy to recognize external calls in the awk code + - can implement much more than lookup, set and list + - may provide faster listing + +Function calls, con: + - longer awk source + - when there are multiple different implementations under different names, + an awk function that needs to operate on all may need to get a function + name prefix and do dynamic function calls that makes the awk + code look more complicated + +Array, pro: + - simple and short awk code, especially for listing ("for in") + - when there are multiple different implementation, passing one of them + to an awk function (by reference) hides the differences, keeping awk + function code simple + - the whole set of data can be handled together: output of split, generic + array print or load functions + +Array, con: + - hidden side effects (risks awk source readability) + - always have to implement lookup, set and list + +NOTE: a major advantage of arrays is listing (the "for in" construction). +In mawk this is implemented by saving a list of all indices that exist at +the time of entering the loop. While most of the time this means duplicating +string references only, it may still be slow and may take considerable amount +of memory if the array is large. What counts as large may vary, but generally +a 10^6 indices may cause memory allocations in the megabyte range already. + +In practice the following considerations could easily decide the question: + A. if there are more operations than lookup/set/list, use functions + B. if only set/get is required, check if listing looks useful or not; + if useful, go for arrays (where listing is done via "for in"); + similarly: if there's a function based API for set/get/list, reconsider + using an array instead of custom listing. Unless arrays are large! + C. are generic array functions useful in common applications? If so, arrays + may be better. Generic array functions include: + - awk function that prints all indices of an array + - awk function doing some complex lookup, e.g. regex search on all indices + - loading the array from a string using split(); useful when indices + are small integers, typically counting from 0 or 1 + D. would there be alternative implementations and generic awk functions + operating on them depending on their arguments? If so, arrays may be better + as they can be passed as reference + +4. examples + +According to these, the I/O port example is better implemented with functions +as arrays offer no benefit in any of the above points: + A. has only set and get, at this point arrays are as good as functions + B. "for in" listing is not a typical application: array has no benefit + C. printing all ports is rarely useful; complex lookups are not common; + loading I/O space with split() is not useful; + no obvious example of generic array code being useful on an IO array: + array has no benefit + D. having multiple alternative I/O spaces and passing one of these + to an awk function as array is not probable: array has no benefit + +An example where array is more suitable is an interface for network interfaces +(ifconfig): arrays NIC_IP[], NIC_NETMASK[], NIC_MTU[], etc, indexed by +the name of the nic: + A. has lookup, set and list; array is as good as functions + B. it's a reasonable application to list all interfaces: "for in" is useful, + array looks better + C. printing all interfaces makes sense; complex lookup + (e.g. "all alias interfaces") makes sense; loading the array may make + sense (e.g. restoring network settings); split wouldn't work, tho; array + looks better + D. no obvious alternative arrays to be passed in arg; array has no benefit + +In point A. and D. arrays are not better than functions (but not worse either), +but in B. and C. arrays definitely have an advantage for this app. diff --git a/doc/developer/vm.html b/doc/developer/vm.html new file mode 100644 index 0000000..2ec18af --- /dev/null +++ b/doc/developer/vm.html @@ -0,0 +1,231 @@ + + +

The Virtual Machine (VM)

+ +The VM lives in execute.c function mawk_execute_(). +

+In principle it takes an array of instructions (INST), executing them +one by one, operating on the current evaluation +stack (or stack for short). Most of the execution really goes in +order as the compiler prepares up everything to achieve linearity. +This makes the execution loop relatively simple and efficient: it is +just a large while(dont_need_to_exit) execute_next_instruction;. +The actual instruction execution is a real large switch on the instruction +opcode. +

+An usual example on how this is implemented in practice can be +obtained by lmawk -Wdump -f test.awk on some simple arithmetic +script: + + +
awk asm (VM instructions) +
+
+BEGIN { a = 3 + 4 * 5 }
+
+
+
+BEGIN
+000 pusha	a
+002 pushd	3
+004 pushd	4
+006 pushd	5
+008 mul
+009 add
+010 assign
+011 pop
+012 exit0
+
+
+

+First the lvalue (target variable, left side) of the assignment is pushed, +then the expression (right side). The stack is, from top to down: {5, 4, 3, a}. +The top of the stack is 5, the second element is 4 by the time mul runs. +Mul will replace these two elements by 20, decreasing the stack size by +one, leaving the result on the top. Next add does a similar job, +replacing the top two items of the stack {20, 3} with their sum, 23. At +the end assign runs on the stack {23, a}, removing both items, copying +the value 23 to the global variable a. At the end it also puts +the result on the top of the stack, leaving the stack as {23} - this is +the result (output) of the assignment operation. Since the script doesn't +need to use the result, it runs a pop that removes and discards +the top item, leaving the stack empty. Since the script didn't have main +or END parts, the script can quit at this point, executing the exit0 +instruction (exiting with value 0 - the implicit exit). +

+NOTE: currently there's absolutely no optimization in the parser: everything +is calculated as written in the script and some values are saved just to be +discarded by the next instruction. +

+An interesting and important feature of execute_() is that it can save all +states and return to the caller at any point of the execution, i.e. +between any two instruction in the code. It can also resume execution from +the next instruction. This provides the host application full control over +scheduling the script, while the script can be built of sequential, blocking +instructions. + +

Jumps and conditions

+ +There are a few instructions that have to break linear execution flow, tho: + +

+Some of the above are implemented using conditional and unconditional jumps +to direct addresses (first column on the asm). For example a simple if +is compiled to contain 2 jumps: + + +
awk asm (VM instructions) +
+
+BEGIN {
+	if (bool)
+		a = 6
+	else
+		a = 7
+}
+
+
+
+BEGIN
+000 pushi	bool
+002 jz		012
+004 pusha	a
+006 pushd	6
+008 assign
+009 pop
+010 jmp		018
+012 pusha	a
+014 pushd	7
+016 assign
+017 pop
+018 exit0
+
+
+

+The first one is a conditional jump, "jump if [top of the stack is] zero" +(jz) - this makes the VM jump to the else branch at address 10. +The then branch ends in an unconditional jump to the next instruction +after the if (which is the implicit exit in this example), bypassing +the code of the else branch. +

+A jump is carried out by a simple modification of the "next instruction" +pointer before running the next iteration of the execution loop. + +

Recursion: function calls

+ +A slightly more complicated mechanism is used when jumps are of recursive +nature: the code has to jump to somewhere to do some work and then +return here and continue execution from the next instruction. A typical +example on this is executing user functions. +

+The original mawk implementation simply called mawk_execute_() recursively. +This meant the C compiler took care of saving all internal states on the +C stack for the detour. However, this wouldn't allow the code to be suspended +during such detour as it would be problematic to rebuild the C stack on a resume. +

+Thus libmawk's mawk_execute_() does not recurse on C level but on VM level. +For example when a function is called (using the call instruction): +

+

+Upon a ret instruction from the function: +

+ +

additional cases of recursion

+A range pattern is recursive as well: it needs to evaluate one or two pattern +matching before it decides whether to execute the action and/or update +the state. The range check starts with instruction _RANGE_CHK which +encodes expression code offsets and state in the next few instruction slots. It +recurses to evaluate expressions which are terminated by the _RANGE_STOP command. +Entering an expression evaluation is similar to a function call while +_RANGE_STOP is very similar to a ret. + +

deep recursion

+ At any time the eval stack has to have enough space after sp for + evaluation the longest awk expression. Any user function recursion will + bump sp leaving less room for expressions and further recursion. Relocating + the stack (with a realloc()) is not a good idea as there might be cell + pointers pointing to stack elements all around. + + Instead, mawk limits expression length in compile time to a fixed + maximum. If entering a new function would not leave at list this + amount of eval stack above sp, "deep recursion" is performed. This + starts by allocating an entire new stack for the call. Call stacking + saves enough pointers so that the code can switch back to the + previous stack easily. The allocation is done using zmalloc(), the overhead + is minimal. Since the original stack/stacks is/are kept intact, any + pointer stays valid. sp points into the new stack block and will increase + there until another deep recursion. + + This wastes some stack space on the old stack (potentially max expression + length minus one slot) but guarantees that: + - checks and special things need to be done only at entering/leaving functions + - even that happens rarely as a stack block is large enough to host + many functions besides the longest expression + - the stack can grow as big as it likes to, without having to allocate + one large block of memory + - all allocation is done from normal instance memory - allocation limit, + and auto cleanup at the end are granted + +

Resuming execute_()

+ +Since the far most common thing in an embedded setup is to resume a +code interrupted by execution limit or a blocking getline, mawk_execute_() is +doing that by default. The top few slots in the eval stack is always a full +state dump, the same thing used in recursion. Entering mawk_execute_() pops this +section and initializes all internal states from it. When execution needs +to be interrupted, mawk_execute_() saves internal states onto the top +of the stack. + +

Entering execute_() (fresh start)

+ +Entering in run state involves setting up internal states pointing to +the beginning of the code in question, pushing these states on top of +the stack and calling mawk_execute_() which will "resume" from these states. +Similar thing happens when the application calls an awk function. +

+It may be that the execution is interrupted in the middle of running of +a large block of code, for example in BEGIN. The top of the stack holds +the current execution state so that mawk_execute_() will be able to +continue execution. The application may decide to run an awk +function before resuming the code: this operation would push a new +set of execution state on top of the stack and call mawk_execute_(). +When the current state finishes at the _RET instruction, mawk_execute_() +would take the next frame from the stack and would automatically resume +execution of the interrupted BEGIN block. This would cause the return value +of the function to be lost and would attempt to resume BEGIN as a side effect +of the function call! +

+To avoid such confusion, any new enter to mawk_execute_() is required to +push two sets of states: an EXEST_EXIT and the actual state it wants +to "resume" at (start execution at). When mawk_execute_() hits the _RET +instruction in the above example, it does pop the next frame, but that +frame would be the EXEST_EXIT which would cause it to interrupt immediately. +This leaves the stack exactly as it looked like before the function call, +and the application later may decide to resume execution. +

+Fresh start entries: +

+ + + diff --git a/doc/example.7libmawk.html b/doc/example.7libmawk.html new file mode 100644 index 0000000..a4ab012 --- /dev/null +++ b/doc/example.7libmawk.html @@ -0,0 +1,1102 @@ + + + + + + + + + +EXAMPLE + + + + +

EXAMPLE

+ +NAME
+SYNOPSIS
+DESCRIPTION
+Example application
+ +
+ + +

NAME + +

+ + +

libmawk example +− how to use the library

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

DESCRIPTION + +

+ + +

Libmawk is a +library that lets applications to embed awk scripts using +the code of the popular implementation mawk. The +normal process is to call libmawk_initialize() to set up a +new mawk context (with script(s) loaded), then in the main +loop feed it using libmawk_append_input(). For "out of +band" communication, the program may also call +functions implemented in awk and read (or modify) global +variables of the awk script. The hos tapplication usally +will also bind some of its functions to the context using +libmawk_register_function, which allows the awk script to +call the host applicaiton’s functions directly as they +were awk builtins or user defined functions. After the main +loop, the application destroys the context freeing up all +memory allocated for the script(s).

+ +

One context is +for one awk program. One awk program may consist of multiple +script files (just as with command line awk, with multiple +-f filename arguments). Libmawk is instance safe, the host +application may create multiple instances of contexts with +the same or with different set of awk scripts loaded. These +contexts are totally separate, no variables, functions or +any sort of states are shared. However, the host application +may provide means of communication between those scripts by +custom functions or by copying variable contents between +them.

+ +

Example application + +

+ + +

The following +example application creates a single context to demonstrate +all the above mentioned functionality.
+#include <stdio.h>
+#include <libmawk.h>

+ +

CELL +*blobb(mawk_state_t *context, CELL * sp, int a_args)
+{

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

int n;

+
+ + +

char buff[64];

+
+ + +

/* do something - print BLOBB and all arguments */

+
+ + +

printf("BLOBB! ");

+
+ + +

for(n = a_args-1; n >= 0; n--)

+
+ + + + +

printf("%d=’%s’ ", n, +libmawk_print_cell((sp-n), buff, sizeof(buff)));

+ + +

printf("0);

+
+ + +

/* restore the stack (remove all arguments) */

+
+ + +

sp = sp - a_args;

+
+ + +

/* set a return value (find out where the return value +is on the stack,

+
+ + +

using libmawk_stackret()) */

+
+ + +

libmawk_set_cell(context, libmawk_stackret(sp), +’f’, (double)1234);

+
+ + +

/* return the new stack pointer - should be the one that +it was before

+
+ + +

arguments had been pushed on the stack */

+
+ + +

return sp;

+
+ +

}

+ +

int main(int +argc, char **argv)
+{

+ + + + + + + + + + + +
+ + +

mawk_state_t *m;

+ + +

CELL ret, arrv, *vr;

+ + +

char buff[64];

+ +

/* the simpler +way is:

+ + + + + + + + + + + +
+ + +

m = libmawk_initialize(argc, argv);

+ + +

However, if the application wants to change the +environment before

+ + +

executing BEGIN, the following, 3 stage initialization +should be done:

+ +

*/

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

m = libmawk_initialize_stage1(); /* set up m */

+
+ + +

custom_array_init(m); /* set up a new builtin array with +side effects, before parsing scripts */

+
+ + +

m = libmawk_initialize_stage2(m, argc, argv); /* parse +args loads the script(s) */

+
+ + +

m = libmawk_initialize_stage3(m); /* execute BEGIN {} +*/

+
+ + +

ret.type = C_NOINIT;

+
+ + +

if (m != NULL) {

+
+ + + + +

/* test function call */

+
+ + + + +

if (libmawk_call_function(m, "func", &ret, +"dfs", (int)42, (double)1.234, (char *)"test +string.") == 0) {

+
+ + + + + +

printf("Return value of func is ’%s’0, +libmawk_print_cell(&ret, buff, sizeof(buff)));

+
+ + + + + +

libmawk_cell_destroy(m, &ret);

+
+ + + + +

}

+
+ + + + +

printf("Failed to call func()0);

+
+ + + + +

/* this is the same function call with a different +syntax */

+
+ + + + +

{

+
+ + + + + +

int i = 42;

+
+ + + + + +

double d = 1.234;

+
+ + + + + +

char *s = "test string.";

+
+ + + + + +

void *args[] = {&i, &d, s};

+
+ + + + + +

if (libmawk_call_functionp(m, "func", +&ret, "dfs", args) != 0) {

+
+ + + + + + +

printf("Return value of func is ’%s’0, +libmawk_print_cell(&ret, buff, sizeof(buff)));

+ + + + + + +

libmawk_cell_destroy(m, &ret);

+ + + + + +

}

+
+ + + + +

}

+
+ + + + +

/* register a C function (resolved runtime) */

+
+ + + + +

if (libmawk_register_function(m, "blobb", +blobb) != 0) {

+
+ + + + + +

fprintf(stderr, "ERROR: Unable to register function +blobb0);

+
+ + + + +

}

+
+ + + + +

/* run some data */

+
+ + + + +

libmawk_append_input(m, "This is a0ultiline test +input0ut in the artificial input buffer.0);

+
+ + + + +

libmawk_run_main(m);

+
+ + + + +

/* print var: scalar */

+
+ + + + +

vr = libmawk_get_var(m, "var");

+
+ + + + +

if (vr != NULL)

+
+ + + + + +

printf("Variable var = ’%s’0, +libmawk_print_cell(vr, buff, sizeof(buff)));

+
+ + + + +

else

+
+ + + + + +

printf("No such variable

+
+ + + + +

/* print var: array */

+
+ +

#warning TODO

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +

arrv.type = C_NOINIT;

+
+ + + + +

if (libmawk_get_array_at(m, "arr", +"hello", &arrv, 0) > 0)

+
+ + + + + +

printf("Variable arr[

+ + + + +

else

+
+ + + + + +

printf("No such variable

+ +

#warning todo: +array set

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +

/* set var: array; change the existing +(arr["hello"]) */

+
+ + +

//

+ + + +

if (ret != NULL)

+
+ + +

//

+ + + + +

libmawk_set_cell(m, ret, ’s’, +"WORLD");

+ + + + +

/* set var: array; create a new index */

+
+ +

#warning todo: array set

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

//

+ + + +

libmawk_get_array_at(m, "arr", +"bye", &arrv, 1);

+ + +

//

+ + + +

libmawk_set_cell(m, ret, ’s’, +"universe");

+ + + + +

/* run some more data */

+ + + + +

libmawk_append_input(m, "Second0);

+ + + + +

libmawk_append_input(m, "run.0);

+ + + + +

libmawk_run_main(m);

+ + + + +

custom_array_print(m);

+ + + + +

/* run end */

+ + + + +

libmawk_uninitialize(m);

+ + +

}

+
+ + +

else {

+
+ + + + +

printf("Init failed.0);

+ + +

}

+
+ + +

printf("END0);

+
+ + +

return 0 ;

+
+ +

}

+
+ + diff --git a/doc/execution.html b/doc/execution.html new file mode 100644 index 0000000..33fbd42 --- /dev/null +++ b/doc/execution.html @@ -0,0 +1,155 @@ + + +

awk script execution

+Libmawk runs the bytecode of the script in a virtual machine. +The VM takes the bytecode as a series of instructions that operate on data +stored on the execution stack and in global states of the script instance +(libmawk_state_t). +

+There is only one thing at a time an instance is doing, however that +one thing may be interrupted and resumed any time. This one thing is +always one of these: +

+

+BEGIN, END, main and awk functions are the four entry points of executing +the script. Normally BEGIN is run right after setting up the script, then +main is run on all input and END is run when the script exits, right +before uninitialization of the script instance. This is a 1:1 copy +of the standard way awk works. The fourth, calling awk functions directly +from the application is an extra entry point. +

+The script is not doing anything unless the application commands it to. Some +of the simplified API does this automatically, but the raw API (staged +init/uninit) always lets the app decide when to start running the script. +This document calls an execution transaction when the application calls +the API to start running a script. +

+Any execution related call is non-blocking, thus it will return after a +reasonable time spent running the script and will never stuck running +an infinite loop. When such an API call returns, the return value +is a mawk_exec_result_t that indicates the reason of the return: +

+

+Execution transaction are collected on the evaluation stack. If +the application requests an execution and the API call returns before +finishing, the transaction is still active. The application is +free to initiate a new execution transaction, without +first finishing the previous one. However, the VM will always resume and +progress running the most recent execution transaction. This means +execution transactions are sort of nested. When the top, most recent +execution transaction finishes (return 3), the next resume request +will go on with the previous transaction. +

+Note, however, that the script has global states. The most obvious state +is the exit state: if the script runs exit(), it will discard all open +transactions. For example consider a script that is running a main part +processing the input. When the application is in this phase, the topmost +transaction is always a "running main" transaction that returned +previously because there was no more input to be processed. If the +application calls an awk function that decides to do an exit(), that will +affect not only discard the function transaction but the pending "running main" +transaction as well. Whenever the application requests a resume on +the code, that will start running the END section. + + +

return path 1.: MAWK_EXER_INT_READ

+Assume stdin is a FIFO between the application and the script. The +first script tries to prefix each line: +
+{
+	print "prefix:", $0
+}
+
+The application fills the FIFO with some data that may contain one or +more full records, potentially ending with a partial (unterminated) +record. If the application resumes the script, it will try to +read all full records and process them. It will interrupt +execution and return MAWK_EXER_INT_READ the first time a full +record can't be read. This always happens "before the {}". +

+A slightly more complicated script prefixes odd and even lines differently: +

+{
+	print "odd:", $0
+	getline
+	print "even:", $0
+}
+
+This script may return with MAWK_EXER_INT_READ either before {} +or in the getline instruction. This means the application should not +assume that when main returns it was not in the middle of such +a block. (In the actual VM main starts with an implicit getline so +there's no difference between the two cases). +

+A similar situation is when an awk function is executing getline on a FIFO: +the application that calls the function shall not expect that the function +finishes and produces its return value in the initial execution request. +Instead the request will create a new execution transaction and +multiple resume calls may be needed until the function actually returns. +

+Obviously the application shall fill the FIFO while executing resumes: +if there is no new input and the script is waiting for new input, the +resume call will return immediately. + + +

return path 2.: MAWK_EXER_INT_RUNLIMIT

+When runlimit is set the VM returns after executing a certain amount of +instructions. The application shall decide whether to simply resume or +to stop executing the script. +

+This feature is useful when the application is implemented as a single +threaded async loop: running a blocking script would block the entire loop. + + +

return path 3.: MAWK_EXER_DONE or MAWK_EXER_FUNCRET

+When BEGIN or main or END finishes MAWK_EXER_DONE is returned. When +an awk function called by the application returns, MAWK_EXER_FUNCRET +is returned and the retc argument is filled with the return value cell +(which may be of cell type NOINIT in case there was no return value). +

+The application shall never expect the initial call that +created the new execution transaction will end in +MAWK_EXER_DONE or MAWK_EXER_FUNCRET; when it does not, +a subsequent resume call eventually will. + +

return path 4.: MAWK_EXER_EXIT

+Similar to MAWK_EXER_DONE, but means the script called exit. +This is legal from even an awk function call, in which case the +function will never have a return value (as the code can not be resumed +any more). Normal awk rules apply: calling exit() from BEGIN or main +(or subsequent functions, called by the script or the application) puts +the script in exit mode and next resume will run END. Calling exit from +END will exit immediately leaving the script in non-runnable state. + + +

conclusion: script execution

+It is safe to assume calling any script execution will return with +a conclusion if, and only if: +
    +
  • the script is not allowed to use getline on FIFOs (which can not be guaranteed!) or there are no FIFOs or otherwise blocking input (i.e. all files are plain files); and +
  • there is no run limit configured +
+

+Since these are not guaranteed in most common use cases, the code should prepare +to: +

    +
  • start executing the code and check if it's already finished +
  • resume until it actually does finish +
  • if the script returned MAWK_EXER_INT_READ: fill FIFOs or if that's not possible stop resuming as there won't be any progress +
+

+Thus following c-pseudo-code should be used: +

+TODO
+
diff --git a/doc/index.html b/doc/index.html new file mode 100644 index 0000000..68bad5f --- /dev/null +++ b/doc/index.html @@ -0,0 +1,28 @@ + + +

Table Of Contents

+

Manual pages

+ +

Design decisions

+ + + + diff --git a/doc/libmawk_append_input.3libmawk.html b/doc/libmawk_append_input.3libmawk.html new file mode 100644 index 0000000..c354eda --- /dev/null +++ b/doc/libmawk_append_input.3libmawk.html @@ -0,0 +1,94 @@ + + + + + + + + + +LIBMAWK_APPEND_INPUT + + + + +

LIBMAWK_APPEND_INPUT

+ +NAME
+SYNOPSIS
+DESCRIPTION
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_append_input +− append a string to an input buffer

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

void +libmawk_append_input(mawk_state_t *m, const +char *input_str);

+ +

void +libmawk_append_ninput(mawk_state_t *m, const +char *input, intlen);

+ +

DESCRIPTION + +

+ + +

The +libmawk_append_input() and +libmawk_append_ninput() functions allow the +application to fill the input buffer of a libmawk context. +No record separator is appended, only the bytes donated by +input_str or input, thus it is possible to append partial +records. Appending to the input doesn’t have the side +effect of any script being run. There may be multiple +libmawk_append_input() calls before a call to +libmawk_run_main(). The latter all is used to let the script +process the input buffer.

+ +

The only +difference between the two calls are the input format: +libmawk_append_input() expects a nul-terminated +string, whereas libmawk_append_ninput() takes an +arbitrary binary data and its length.

+ +

Argument m is a +libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3().

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk), +libmawk_run_main(3libmawk).

+
+ + diff --git a/doc/libmawk_call_function.3libmawk.html b/doc/libmawk_call_function.3libmawk.html new file mode 100644 index 0000000..f82ac85 --- /dev/null +++ b/doc/libmawk_call_function.3libmawk.html @@ -0,0 +1,106 @@ + + + + + + + + + +LIBMAWK_CALL_FUNCTION + + + + +

LIBMAWK_CALL_FUNCTION

+ +NAME
+SYNOPSIS
+DESCRIPTION
+RETURN VALUE
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_call_function +− call an user defined (script) function

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

int +libmawk_call_function(mawk_state_t *MAWK, +const char *fname, CELL *res, +const char *argtpes, ...);
+int libmawk_call_functionp(mawk_state_t *
MAWK, +const char *fname, CELL *res, +const char *argtpes, void **args);

+ +

DESCRIPTION + +

+ + +

The +libmawk_call_function() function looks up an user +defined awk function called fname , fills the stack +with arguments converted from the varargs and calls the +function. The libmawk_call_functionp() performs the +same action but avoids using vararg by requiring an array of +generic pointers to the function arguments.

+ +

Argtype is a +zero terminated string for both functions, each character +corresponding to an argument. Type characters are described +in libmawk_set_cell() manual page.

+ +

If res is +non-NULL, it is cell_destroyed (regardless of errors) and +the return value of the user function is copied into it. The +caller shall run libmawk_cell_destroy on it.

+ +

Argument m is a +libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3().

+ +

RETURN VALUE + +

+ + +

A pointer to +the cell returned by the user function. The cell +returnedmust be destroyed by the application using +libmawk_cell_destroy.

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk), +libmawk_cell_destroy(3libmawk), +libmawk_set_cell(3libmawk),

+
+ + diff --git a/doc/libmawk_cell_destroy.3libmawk.html b/doc/libmawk_cell_destroy.3libmawk.html new file mode 100644 index 0000000..448912a --- /dev/null +++ b/doc/libmawk_cell_destroy.3libmawk.html @@ -0,0 +1,78 @@ + + + + + + + + + +LIBMAWK_CELL_DESTROY + + + + +

LIBMAWK_CELL_DESTROY

+ +NAME
+SYNOPSIS
+DESCRIPTION
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_cell_destroy +− free all memory associated with a cell

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

void +libmawk_cell_destroy(mawk_state_t *m, CELL +*c);

+ +

DESCRIPTION + +

+ + +

The +libmawk_cell_destroy() function frees all memory +allocated to store a mawk cell. It is useful with some of +the libmawk calls that return a newly allocated cell, such +as the libmawk_call_function() call.

+ +

Argument m is a +libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3().

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk), +libmawk_call_function(3libmawk).

+
+ + diff --git a/doc/libmawk_get_var.3libmawk.html b/doc/libmawk_get_var.3libmawk.html new file mode 100644 index 0000000..edf6a82 --- /dev/null +++ b/doc/libmawk_get_var.3libmawk.html @@ -0,0 +1,98 @@ + + + + + + + + + +LIBMAWK_GET_VAR + + + + +

LIBMAWK_GET_VAR

+ +NAME
+SYNOPSIS
+DESCRIPTION
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_get_var +− returns a pointer to a mawk variable

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

CELL +*libmawk_get_var(mawk_state_t *m, const char +*vname);
+int libmawk_get_array_at(mawk_state_t *
m, +const char *arr_name,
+const char *
idx, const char +*res, int alloc);

+ +

DESCRIPTION + +

+ + +

The +libmawk_get_var() function returns a pointer to a +mawk cell that represents the global variable with name +passed in vname in the given context. The returned +CELL should never be free’d or destroyed. Function +libmawk_print_cell may be used for converting the cell to +string. The caller should not change the type of cell but is +free to change the value.

+ +

Function +libmawk_get_array_at() performs the same operation +for an element of an array. -1 is returned if +arr_name is not an array or upon an error. If +idx is not an existing index in the array it is +allocated if alloc is non-zero. If res is not +NULL, it is destroyed (regardless of the return value) and +if the index exists (or is created by the call), is loaded +with the value. The caller needs to destroy res after +use. Since res is destroyed when non-NULL, it must be +a valid cell with valid type.

+ +

Argument m is a +libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3().

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk), +libmawk_call_function(3libmawk), +libmawk_print_cell(3libmawk).

+
+ + diff --git a/doc/libmawk_initialize.3libmawk.html b/doc/libmawk_initialize.3libmawk.html new file mode 100644 index 0000000..ab75f64 --- /dev/null +++ b/doc/libmawk_initialize.3libmawk.html @@ -0,0 +1,86 @@ + + + + + + + + + +LIBMAWK_INITIALIZE + + + + +

LIBMAWK_INITIALIZE

+ +NAME
+SYNOPSIS
+DESCRIPTION
+RETURN VALUE
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_initialize +− create a new libmawk context

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ + +

mawk_state_t +*libmawk_initialize(int s, char +*argv[]);

+ +

DESCRIPTION + +

+ + +

The +libmawk_initialize() function returns a pointer to a +newly created libmawk context. Any amount of libmawk +contexts can live in parallel in an application. Arguments +are the same as for a command line mawk session. Scripts are +loaded (either from command line or from files using -f), +variables are set (with -v), special options are set (with +-W), etc.

+ +

RETURN VALUE + +

+ + +

A pointer to a +new libmawk context or NULL on error.

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_uninitialize(3libmawk),

+
+ + diff --git a/doc/libmawk_initialize_stage.3libmawk.html b/doc/libmawk_initialize_stage.3libmawk.html new file mode 100644 index 0000000..5009553 --- /dev/null +++ b/doc/libmawk_initialize_stage.3libmawk.html @@ -0,0 +1,135 @@ + + + + + + + + + +LIBMAWK_INITIALIZE_STAGE + + + + +

LIBMAWK_INITIALIZE_STAGE

+ +NAME
+SYNOPSIS
+DESCRIPTION
+RETURN VALUE
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_initialize_stage* +− create a new libmawk context in 3 stages

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ + +

mawk_state_t +*libmawk_initialize_stage1(void);

+ + +

mawk_state_t +*libmawk_initialize_stage2(mawk_state_t * +m,int s +,char*"argv[]);

+ + +

mawk_state_t +*libmawk_initialize_stage3(mawk_state_t * +m);

+ +

DESCRIPTION + +

+ + +

The +libmawk_initialize_stage*() functions together do the +same as libmawk_initialize() but allows the application to +take actions between different stages. +libmawk_initialize_stage1() returns a pointer to a +newly created libmawk context. Any amount of libmawk +contexts can live in parallel in an application.

+ + +

libmawk_initialize_stage2() +can be called after a succesful stage1 call.
+Stage2 is responsible for processing the command line +arguments and loading any script.

+ +

Arguments are +the same as for a command line mawk session. Scripts are +loaded (either from command line or from files using -f), +variables are set (with -v), special options are set (with +-W), etc. Unlike with libmawk_initialize(), the application +may decide not to provide any script at this stage. All +command line arguments are processed.

+ +

The most common +case is that the application calls stage1 with no script, +then already having a context makes some manipulations on it +(for example registers some C functions that would be +already called in the BEGIN part of the script that will be +later loaded). Optionally before calling stage2 the +application loads the actual script(s) using +mawk_append_input_file().

+ + +

libmawk_initialize_stage3() +is called as a final step of the three-stage initialization +process. Stage3 is responsible for running all the BEGIN +parts of all scripts loaded at stage1 or stage2. It is +useful to have stage3 in a separate call to allow +applications to manipulate the context right before +initializing the scripts.

+ +

Stage2 gets the +pointer returned by stage1 and stage3 gets the pointer +returned by stage2. Subsequent calls to libmawk functions +should get the pointer returned by stage3.

+ +

RETURN VALUE + +

+ + +

At stage 1 a +pointer to a new libmawk context or NULL on error. +Subsequent stages will return the same pointer or NULL on +error.

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_uninitialize(3libmawk), +mawk_append_input_file(3libmawk).

+
+ + diff --git a/doc/libmawk_register_function.3libmawk.html b/doc/libmawk_register_function.3libmawk.html new file mode 100644 index 0000000..333beed --- /dev/null +++ b/doc/libmawk_register_function.3libmawk.html @@ -0,0 +1,119 @@ + + + + + + + + + +LIBMAWK_REGISTER_FUNCTION + + + + +

LIBMAWK_REGISTER_FUNCTION

+ +NAME
+SYNOPSIS
+DESCRIPTION
+RETURN VALUE
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_register_function +− registers a C function with a callback

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

typedef CELL +*libmawk_c_function(mawk_state_t *m, CELL +*sp, int a_args);
+int libmawk_register_function(mawk_state_t +*
MAWK, const char *fname, +libmawk_c_function *callback);
+CELL *libmawk_stackret(CELL +*
original_sp);

+ +

DESCRIPTION + +

+ + +

The +libmawk_register_function() call registers an user +defined function donated by the host application in a mawk +context so that it acts exactly like user defined functions +in written in awk. The name of the new function is given in +fname and should not match any of the user defined +function names in the awk script.

+ +

When the user +function is called back, argument sp is the stack +pointer and a_args holds the number of arguments. The +user function is responsible for managing the stack: it +should pop all arguments before returning.

+ +

The user +function should also generate a return value, which is done +by calling libmawk_set_cell() on the stack slot returned by +libmawk_stackret. Libmawk_stackret should be called with the +modified sp after popping all arguments.

+ +

Argument m is a +libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3().

+ +

For more +information about user function callbacks, especially on +stack handling, see manual page example(3libmawk).

+ +

RETURN VALUE + +

+ + +

The user +function should return the stack pointer after popping all +arguments.

+ +

The +libmawk_register_function call returns 0 on success.

+ +

Call +libmawk_stackret returns a stack pointer to the slot where +the user function should store its return value.

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk), +libmawk_set_cell(3libmawk), +libmawk_print_cell(3libmawk).

+
+ + diff --git a/doc/libmawk_run_main.3libmawk.html b/doc/libmawk_run_main.3libmawk.html new file mode 100644 index 0000000..95f433f --- /dev/null +++ b/doc/libmawk_run_main.3libmawk.html @@ -0,0 +1,81 @@ + + + + + + + + + +LIBMAWK_RUN_MAIN + + + + +

LIBMAWK_RUN_MAIN

+ +NAME
+SYNOPSIS
+DESCRIPTION
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_run_main +− run main parts of a script

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

void +libmawk_run_main(mawk_state_t *m);

+ +

DESCRIPTION + +

+ + +

The +libmawk_run_main() attempts to take and parse the +next input record and runs all main parts of the script that +matches. If there are multiple full records in the input +buffer, the process repeats until the buffer becomes empty +or contains a partial record. If there is no full record in +the buffer, the call returns with nothing done. The call +itself never blocks, but the script may. The input buffer +may be filled using the libmawk_append_input() call.

+ +

Argument m is a +libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3().

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk), +libmawk_append_input(3libmawk),

+
+ + diff --git a/doc/libmawk_set_cell.3libmawk.html b/doc/libmawk_set_cell.3libmawk.html new file mode 100644 index 0000000..16aa778 --- /dev/null +++ b/doc/libmawk_set_cell.3libmawk.html @@ -0,0 +1,101 @@ + + + + + + + + + +LIBMAWK_SET_CELL + + + + +

LIBMAWK_SET_CELL

+ +NAME
+SYNOPSIS
+DESCRIPTION
+RETURN VALUE
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_set_cell +− set the value of a mawk cell.

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

CELL +*libmawk_set_cell(mawk_state_t *m, CELL +*cell, const +charargtype,...);
+CELL *libmawk_set_cellp(mawk_state_t *
m, CELL +*cell, const charargtype, void +*argp);

+ +

DESCRIPTION + +

+ + +

The +libmawk_set_cell() function modifies the value of a +mawk cell (variable). Argumetn argtype is a format character +that describes the type of the payload (accessed trough +vararg).

+ +

The +libmawk_set_cellp() function performs the same action +but accepts a generic pointer to the payload.

+ +

Format +character is one of the followings:
+’d’ for int payload
+’f’ for double payload
+’s’ for (zero terminated) char * payload.

+ +

Argument m is a libmawk context +previously returned by libmawk_initialize() or +libmawk_initialize_stage3().

+ +

RETURN VALUE + +

+ + +

A pointer to +the cell modified.

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk), +libmawk_get_var(3libmawk).

+
+ + diff --git a/doc/libmawk_uninitialize.3libmawk.html b/doc/libmawk_uninitialize.3libmawk.html new file mode 100644 index 0000000..0c6d251 --- /dev/null +++ b/doc/libmawk_uninitialize.3libmawk.html @@ -0,0 +1,73 @@ + + + + + + + + + +LIBMAWK_UNINITIALIZE + + + + +

LIBMAWK_UNINITIALIZE

+ +NAME
+SYNOPSIS
+DESCRIPTION
+SEE ALSO
+ +
+ + +

NAME + +

+ + + +

libmawk_uninitialize +− destroy a libmawk context

+ +

SYNOPSIS + +

+ + +

#include +<libmawk.h>

+ +

void +libmawk_uninitialize(mawk_state_t * +m);

+ +

DESCRIPTION + +

+ + +

The +libmawk_uninitialize() function destroys a context +previously created using libmawk_initialize() or +libmawk_initialize_stage1() call. It unloads scripts and +frees all memory of the context.

+ +

SEE ALSO + +

+ + + +

libmawk_initialize_stage(3libmawk), +libmawk_initialize(3libmawk),

+
+ + diff --git a/doc/lmawk.1.html b/doc/lmawk.1.html new file mode 100644 index 0000000..e5b7a2b --- /dev/null +++ b/doc/lmawk.1.html @@ -0,0 +1,2998 @@ + + + + + + + + + +LMAWK + + + + +

LMAWK

+ +NAME
+SYNOPSIS
+DESCRIPTION
+OPTIONS
+THE AWK LANGUAGE
+EXAMPLES
+COMPATIBILITY ISSUES
+SEE ALSO
+BUGS
+AUTHOR
+ +
+ + +

NAME + +

+ + +

lmawk − +pattern scanning and text processing language

+ +

SYNOPSIS + +

+ + +

lmawk +[−W option] [−F +value] [−v var=value] +[−−] ’program text’ [file ...] +
+lmawk
[−W option] [−F +value] [−v var=value] +[−f program-file] [−−] [file +...]

+ +

DESCRIPTION + +

+ + +

lmawk is +an interpreter for the AWK Programming Language derived from +mawk. The AWK language is useful for manipulation of data +files, text retrieval and processing, and for prototyping +and experimenting with algorithms. lmawk is a new +awk meaning it implements the AWK language as defined in +Aho, Kernighan and Weinberger, The AWK Programming +Language, Addison-Wesley Publishing, 1988. (Hereafter +referred to as the AWK book.) mawk conforms to the +Posix 1003.2 (draft 11.3) definition of the AWK language +which contains a few features not described in the AWK book, +and mawk provides a small number of extensions.

+ +

An AWK program +is a sequence of pattern {action} pairs and function +definitions. Short programs are entered on the command line +usually enclosed in ’ ’ to avoid shell +interpretation. Longer programs can be read in from a file +with the −f option. Data input is read from the list +of files on the command line or from standard input when the +list is empty. The input is broken into records as +determined by the record separator variable, RS. +Initially, RS = "\n" and records are +synonymous with lines. Each record is compared against each +pattern and if it matches, the program text for +{action} is executed.

+ +

OPTIONS + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

F value

+ + +

sets the field separator, +FS, to value.

+ + +

f file

+ + +

Program text is read from file instead of from +the command line. Multiple −f options are +allowed. As a libmawk extension, if file name starts with +plus (’+’), it is not loaded if the same file +has been loaded already by a previous -f or include from any +of the scripts already loaded.

+ + +

b file

+ + +

Program bytecode is read from file . Multiple +−b options are allowed. Bytecode can be +generated using -Wcompile. Libmawk may refuse to load +bytecode generated on a different system if byte order, type +sizes or dump version differs.

+ + +

v var=value

+ + +

assigns value to program variable var.

+ + +

−−

+ + +

indicates the unambiguous end of options.

+ +

The above +options will be available with any Posix compatible +implementation of AWK, and implementation specific options +are prefaced with −W. lmawk provides +six:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

W version

+ + +

lmawk writes its version +and copyright to stdout and compiled limits to stderr and +exits 0.

+ + +

W debug

+ + +

include location info in the compiled code; location +information is visible in the dump and when debugging +libmawk.

+ + +

W dump

+ + +

writes an assembler like listing of the internal +representation of the program to stdout and exits 0 (on +successful compilation).

+ + +

W dumpsym

+ + +

writes a list of global symbols to stdout and exits 0 +(on successful compilation).

+ + +

W compile

+ + +

writes a binary dump of the bytecode to stdout. This +bytecode can be loaded using the −b switch.

+ + +

W interactive

+ + +

sets unbuffered writes to stdout and line buffered reads +from stdin. Records from stdin are lines regardless of the +value of RS.

+ + +

W maxmem=num

+ + +

limit dynamic memory allocation during compilation and +execution to num bytes and exit with +out-of-the-memory error if more memory is to be allocated. +Optional suffixes are k for kilobyte and m for megabyte. 0 +means unlimited, which is also the default.

+ + +

W exec file

+ + +

Program text is read from file and this is the +last option. Useful on systems that support the #! +"magic number" convention for executable +scripts.

+ + +

W sprintf=num

+ + +

adjusts the size of lmawk’s internal +sprintf buffer to num bytes. More than rare use of +this option indicates lmawk should be recompiled.

+ + +

W posix_space

+ + +

forces lmawk not to consider ’\n’ to +be space.

+ +

The short forms +−W[vdiesp] are recognized and on some systems +−We is mandatory to avoid command line length +limitations.

+ +

THE AWK LANGUAGE + +

+ + +

1. Program +structure
+An AWK program is a sequence of pattern {action} +pairs and user function definitions.

+ +

A pattern can +be:

+ +

BEGIN
+END

+expression
+expression , expression

+ +

One, but not +both, of pattern {action} can be omitted. If +{action} is omitted it is implicitly { print }. If +pattern is omitted, then it is implicitly matched. +BEGIN and END patterns require an action.

+ +

Statements are +terminated by newlines, semi-colons or both. Groups of +statements such as actions or loop bodies are blocked via { +... } as in C. The last statement in a block doesn’t +need a terminator. Blank lines have no meaning; an empty +statement is terminated with a semi-colon. Long statements +can be continued with a backslash, \. A statement can be +broken without a backslash after a comma, left brace, +&&, ||, do, else, the right +parenthesis of an if, while or for +statement, and the right parenthesis of a function +definition. A comment starts with # and extends to, but does +not include the end of line.

+ +

The following +statements control program flow inside blocks.

+ +

if ( +expr ) statement

+ +

if ( +expr ) statement else +statement

+ +

while ( +expr ) statement

+ +

do +statement while ( expr )

+ +

for ( +opt_expr ; opt_expr ; opt_expr ) +statement

+ +

for ( +var in array ) statement

+ + +

continue

+ + +

break

+ +

2. Data +types, conversion and comparison
+There are two basic data types, numeric and string. Numeric +constants can be integer like −2, decimal like 1.08, +or in scientific notation like −1.1e4 or .28E−3. +All numbers are represented internally and all computations +are done in floating point arithmetic. So for example, the +expression 0.2e2 == 20 is true and true is represented as +1.0.

+ +

String +constants are enclosed in double quotes.

+ +

"This is a +string with a newline at the end.\n"

+ +

Strings can be continued across +a line by escaping (\) the newline. The following escape +sequences are recognized.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +

\\

+ + + +

\

+ + + +

\"

+ + + +

"

+ + + +

\a

+ + + +

alert, ascii 7

+ + + +

\b

+ + + +

backspace, ascii 8

+ + + +

\t

+ + + +

tab, ascii 9

+ + + +

\n

+ + + +

newline, ascii 10

+ + + +

\v

+ + + +

vertical tab, ascii 11

+ + + +

\f

+ + + +

formfeed, ascii 12

+ + + +

\r

+ + + +

carriage return, ascii 13

+ + + +

\ddd

+ + + +

1, 2 or 3 octal digits for ascii ddd

+ + + +

\xhh

+ + + +

1 or 2 hex digits for ascii hh

+ +

If you escape +any other character \c, you get \c, i.e., lmawk +ignores the escape.

+ +

There are +really three basic data types; the third is number and +string which has both a numeric value and a string value +at the same time. User defined variables come into existence +when first referenced and are initialized to null, a +number and string value which has numeric value 0 and string +value "". Non-trivial number and string typed data +come from input and are typically stored in fields. (See +section 4).

+ +

The type of an +expression is determined by its context and automatic type +conversion occurs if needed. For example, to evaluate the +statements

+ + + + + +
+ + +

y = x + 2 ; z = x "hello"

+ +

The value +stored in variable y will be typed numeric. If x is not +numeric, the value read from x is converted to numeric +before it is added to 2 and stored in y. The value stored in +variable z will be typed string, and the value of x will be +converted to string if necessary and concatenated with +"hello". (Of course, the value and type stored in +x is not changed by any conversions.) A string expression is +converted to numeric using its longest numeric prefix as +with atof(3). A numeric expression is converted to +string by replacing expr with sprintf(CONVFMT, +expr), unless expr can be represented on the +host machine as an exact integer then it is converted to +sprintf("%d", expr). +Sprintf() is an AWK built-in that duplicates the +functionality of sprintf(3), and CONVFMT is a +built-in variable used for internal conversion from number +to string and initialized to "%.6g". Explicit type +conversions can be forced, expr "" is +string and expr+0 is numeric.

+ +

To evaluate, +expr1 rel-op expr2, if both operands +are numeric or number and string then the comparison is +numeric; if both operands are string the comparison is +string; if one operand is string, the non-string operand is +converted and the comparison is string. The result is +numeric, 1 or 0.

+ +

In boolean +contexts such as, if ( expr ) +statement, a string expression evaluates true if and +only if it is not the empty string ""; numeric +values if and only if not numerically zero.

+ +

3. Regular +expressions
+In the AWK language, records, fields and strings are often +tested for matching a regular expression. Regular +expressions are enclosed in slashes, and

+ + + + + +
+ + +

expr ~ /r/

+ +

is an AWK +expression that evaluates to 1 if expr +"matches" r, which means a substring of +expr is in the set of strings defined by r. +With no match the expression evaluates to 0; replacing ~ +with the "not match" operator, !~ , reverses the +meaning. As pattern-action pairs,

+ + + + + +
+ + +

/r/ { action } and $0 ~ /r/ +{ action }

+ +

are the same, +and for each input record that matches r, +action is executed. In fact, /r/ is an AWK +expression that is equivalent to ($0 ~ /r/) +anywhere except when on the right side of a match operator +or passed as an argument to a built-in function that expects +a regular expression argument.

+ +

AWK uses +extended regular expressions as with egrep(1). The +regular expression metacharacters, i.e., those with special +meaning in regular expressions are

+ + + + + +
+ + +

 ^ $ . [ ] | ( ) * + ?

+ +

Regular +expressions are built up from characters as follows:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

c

+ + +

matches any non-metacharacter +c.

+ + +

\c

+ + +

matches a character defined by the same escape sequences +used in string constants or the literal character c +if \c is not an escape sequence.

+ + +

.

+ + +

matches any character (including newline).

+ + +

^

+ + +

matches the front of a string.

+ + +

$

+ + +

matches the back of a string.

+ + +

[c1c2c3...]

+ + +

matches any character in the class c1c2c3... . An +interval of characters is denoted c1−c2 inside a class +[...].

+ + +

[^c1c2c3...]

+ + +

matches any character not in the class c1c2c3...

+ +

Regular +expressions are built up from other regular expressions as +follows:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

r1r2

+ + +

matches r1 followed +immediately by r2 (concatenation).

+ + +

r1 | r2

+ + +

matches r1 or r2 (alternation).

+ + +

r*

+ + +

matches r repeated zero or more times.

+ + +

r+

+ + +

matches r repeated one or more times.

+ + +

r?

+ + +

matches r zero or once.

+ + +

(r)

+ + +

matches r, providing grouping.

+ +

The increasing +precedence of operators is alternation, concatenation and +unary (*, + or ?).

+ +

For +example,

+ + +

/^[_a−zA-Z][_a−zA−Z0−9]*$/ +and

+ + + + + +
+ + + +

/^[−+]?([0−9]+\.?|\.[0−9])[0−9]*([eE][−+]?[0−9]+)?$/

+ +

are matched by +AWK identifiers and AWK numeric constants respectively. Note +that . has to be escaped to be recognized as a decimal +point, and that metacharacters are not special inside +character classes.

+ +

Any expression +can be used on the right hand side of the ~ or !~ operators +or passed to a built-in that expects a regular expression. +If needed, it is converted to string, and then interpreted +as a regular expression. For example,

+ + + + + + + + +
+ + +

BEGIN { identifier = +"[_a−zA−Z][_a−zA−Z0−9]*" +}

+ + +

$0 ~ "^" identifier

+ +

prints all +lines that start with an AWK identifier.

+ +

lmawk +recognizes the empty regular expression, //, which matches +the empty string and hence is matched by any string at the +front, back and between every character. For example,

+ + + + + + + + +
+ + +

echo abc | lmawk { gsub(//, "X") ; print }

+ + +

XaXbXcX

+ +

4. Records +and fields
+Records are read in one at a time, and stored in the +field variable $0. The record is split into +fields which are stored in $1, $2, ..., +$NF. The built-in variable NF is set to the +number of fields, and NR and FNR are +incremented by 1. Fields above $NF are set to +"".

+ +

Assignment to +$0 causes the fields and NF to be recomputed. +Assignment to NF or to a field causes $0 to be +reconstructed by concatenating the $i’s +separated by OFS. Assignment to a field with index +greater than NF, increases NF and causes +$0 to be reconstructed.

+ +

Data input +stored in fields is string, unless the entire field has +numeric form and then the type is number and string. For +example,

+ + + + + + + + + + + +
+ + +

echo 24 24E |

+ + +

lmawk ’{ print($1>100, $1>"100", +$2>100, $2>"100") }’

+ + +

0 1 1 1

+ +

$0 and +$2 are string and $1 is number and string. The +first comparison is numeric, the second is string, the third +is string (100 is converted to "100"), and the +last is string.

+ +

5. +Expressions and operators
+The expression syntax is similar to C. Primary expressions +are numeric constants, string constants, variables, fields, +arrays and function calls. The identifier for a variable, +array or function can be a sequence of letters, digits and +underscores, that does not start with a digit. Variables are +not declared; they exist when first referenced and are +initialized to null.

+ +

New expressions +are composed with the following operators in order of +increasing precedence.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

assignment

+
+ + +

= += −= *= /= %= ^=

+ + +

conditional

+
+ + +

? :

+ + +

logical or

+
+ + +

||

+ + +

logical and

+
+ + +

&&

+ + +

array membership

+ + +

in

+ + +

matching

+ + + +

~ !~

+ + +

relational

+
+ + +

< > <= >= == !=

+ + +

concatenation

+
+ + +

(no explicit operator)

+ + +

add ops

+ +
+ + +

+ −

+ + +

mul ops

+ +
+ + +

* / %

+ + +

unary

+ +
+ + +

+ −

+ + +

logical not

+
+ + +

!

+ + +

exponentiation

+
+ + +

^

+ + +

inc and dec

+
+ + +

++ −− (both post and pre)

+ + +

field

+ +
+ + +

$

+ +

Assignment, +conditional and exponentiation associate right to left; the +other operators associate left to right. Any expression can +be parenthesized.

+ +

6. +Arrays
+Awk provides one-dimensional arrays. Array elements are +expressed as array[expr]. Expr is +internally converted to string type, so, for example, A[1] +and A["1"] are the same element and the actual +index is "1". Arrays indexed by strings are called +associative arrays. Initially an array is empty; elements +exist when first accessed. An expression, expr +in array evaluates to 1 if +array[expr] exists, else to 0.

+ +

There is a form +of the for statement that loops over each index of an +array.

+ + + + + +
+ + +

for ( var in array ) +statement

+ +

sets var +to each index of array and executes statement. +The order that var transverses the indices of +array is not defined.

+ +

The statement, +delete array[expr], causes +array[expr] not to exist. lmawk +supports an extension, delete array, which +deletes all elements of array.

+ + +

Multidimensional +arrays are synthesized with concatenation using the built-in +variable SUBSEP. +array[expr1,expr2] is equivalent to +array[expr1 SUBSEP expr2]. +Testing for a multidimensional element uses a parenthesized +index, such as

+ + + + + +
+ + +

if ( (i, j) in A ) print A[i, j]

+ +

7. +Builtin-variables
+The following variables are built-in and initialized before +program execution.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

ARGC

+ + +

number of command line +arguments.

+ + +

ARGV

+ + +

array of command line arguments, 0..ARGC-1.

+ + +

CONVFMT

+ + +

format for internal conversion of numbers to string, +initially = "%.6g".

+ + +

ENVIRON

+ + +

array indexed by environment variables. An environment +string, var=value is stored as +ENVIRON[var] = value.

+ + +

FILENAME

+ + +

name of the current input file.

+ + +

FNR

+ + +

current record number in FILENAME.

+ + +

FS

+ + +

splits records into fields as a regular expression.

+ + +

NF

+ + +

number of fields in the current record.

+ + +

NR

+ + +

current record number in the total input stream.

+ + +

OFMT

+ + +

format for printing numbers; initially = +"%.6g".

+ + +

OFS

+ + +

inserted between fields on output, initially = " +".

+ + +

ORS

+ + +

terminates each record on output, initially = +"\n".

+ + +

RLENGTH

+ + +

length set by the last call to the built-in function, +match().

+ + +

RS

+ + +

input record separator, initially = "\n".

+ + +

RSTART

+ + +

index set by the last call to match().

+ + +

SUBSEP

+ + +

used to build multiple array subscripts, initially = +"\034".

+ + +

ERRNO

+ + +

misc built-in functions (libmawk extensions) use this +variable to rerport error. All extension calls will set this +variable before returning, therefor ERRNO holds the result +of the last call. An empty string value means no error. +Error messages are formatted in a way that the first word is +an unique integer, followed by a human readable error +message from the second word. int(ERRNO) can be used to +acquire the error code, which then can be used as a +secondary output from the extension function. For example, +an awk program can use valueof() to determine if a global +symbol exists and is a function or a variable or anything +else.

+ + +

LIBPATH

+ + +

is a semicolon separated list of search paths. When +loading an awk script by file name (-f command line argument +or include from another awk script) these paths are inserted +before the file name, in order, one by one, until the first +path that allows opening the file. An empty path is +equivalent to the current working directory. LIBPATH can be +modified from the command line using -v, as arguments are +scanned before loading the scripts. Setting LIBPATH to empty +string results in the original behaviour of mawk. LIBPATH is +ignored for script file names starting with slash +(’/’) as those are assumed to be absolute +paths.

+ +

8. Built-in +functions
+String functions

+ +

gsub(r,s,t) +gsub(r,s)

+ +

Global substitution, every +match of regular expression r in variable t is +replaced by string s. The number of replacements is +returned. If t is omitted, $0 is used. An +& in the replacement string s is replaced by the +matched substring of t. \& and \\ put literal +& and \, respectively, in the replacement string.

+ +

index(s,t)

+ +

If t is a substring of +s, then the position where t starts is +returned, else 0 is returned. The first character of +s is in position 1.

+ +

length(s)

+ +

Returns the length of string +s.

+ +

match(s,r)

+ +

Returns the index of the first +longest match of regular expression r in string +s. Returns 0 if no match. As a side effect, +RSTART is set to the return value. RLENGTH is +set to the length of the match or −1 if no match. If +the empty string is matched, RLENGTH is set to 0, and +1 is returned if the match is at the front, and +length(s)+1 is returned if the match is at the +back.

+ +

split(s,A,r) +split(s,A)

+ +

String s is split into +fields by regular expression r and the fields are +loaded into array A. The number of fields is +returned. See section 11 below for more detail. If r +is omitted, FS is used.

+ + +

sprintf(format,expr-list)

+ +

Returns a string constructed +from expr-list according to format. See the +description of printf() below.

+ +

sub(r,s,t) +sub(r,s)

+ +

Single substitution, same as +gsub() except at most one substitution.

+ +

substr(s,i,n) +substr(s,i)

+ +

Returns the substring of string +s, starting at index i, of length n. If +n is omitted, the suffix of s, starting at +i is returned.

+ +

tolower(s)

+ +

Returns a copy of s with +all upper case characters converted to lower case.

+ +

toupper(s)

+ +

Returns a copy of s with +all lower case characters converted to upper case.

+ +

Arithmetic +functions

+ + +

atan2(y,x) +Arctan of y/x between -PI and PI.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

cos(x)

+
+ + +

Cosine function, x in radians.

+ + +

exp(x)

+
+ + +

Exponential function.

+ + +

int(x)

+
+ + +

Returns x truncated towards zero.

+ + +

log(x)

+
+ + +

Natural logarithm.

+ + +

rand()

+
+ + +

Returns a random number between zero and one.

+ + +

sin(x)

+
+ + +

Sine function, x in radians.

+ + +

sqrt(x)

+
+ + +

Returns square root of x.

+ +

srand(expr) srand()

+ +

Seeds the random number +generator, using the clock if expr is omitted, and +returns the value of the previous seed. lmawk seeds +the random number generator from the clock at startup so +there is no real need to call srand(). Srand(expr) is +useful for repeating pseudo random sequences.

+ +

Misc functions +(libmawk extensions)

+ + +

call(fname,arg1,arg2,...)

+ +

Call awk function fname +with the supplied arguments. If the call fails, empty value, +else the return value of the callee is returned. Built-in +variable ERRNO is always set.

+ + +

acall(fname,arrname)

+ +

Call awk function fname +with arguments supplied in array named arrname (both +arguments are strings naming an existing object). The array +should be indexed from 1. Number of arguments is determined +by looking for the first empty (non-existing) index in the +array. If the call fails, empty value, else the return value +of the callee is returned. Built-in variable ERRNO is always +set.

+ +

valueof(vname +[,idx])

+ +

Return the value of variable +fname; if the variable is an array, return the +element indexed by idx (which must be present in this +case). If index is not present or is empty (""), +the variable is expected to be scalar. Built-in variable +ERRNO is always set. NOTE: valueof() has access to the +global symbol table only. It will fail to resolve anything +else than global objects; most notably it will fail on local +variables, $ arguments and on most of the built-in +variables.

+ +

9. Input and +output
+There are two output statements, print and +printf.

+ + + + + + + + +
+ + +

print

+ + +

writes $0 ORS to standard output.

+
+ +

print expr1, +expr2, ..., exprn

+ +

writes expr1 OFS +expr2 OFS ... exprn ORS to +standard output. Numeric expressions are converted to string +with OFMT.

+ +

printf format, +expr-list

+ +

duplicates the printf C library +function writing to standard output. The complete ANSI C +format specifications are recognized with conversions %c, +%d, %e, %E, %f, %g, %G, %i, %o, %s, %u, %x, %X and %%, and +conversion qualifiers h and l.

+ +

The argument +list to print or printf can optionally be enclosed in +parentheses. Print formats numbers using OFMT or +"%d" for exact integers. "%c" with a +numeric argument prints the corresponding 8 bit character, +with a string argument it prints the first character of the +string. The output of print and printf can be redirected to +a file or command by appending > file, >> +file or | command to the end of the print +statement. Redirection opens file or command +only once, subsequent redirections append to the already +open stream. By convention, lmawk associates the +filename "/dev/stderr" with stderr which allows +print and printf to be redirected to stderr. lmawk +also associates "−" and +"/dev/stdout" with stdin and stdout which allows +these streams to be passed to functions. Opening /dev/fd/N +will do an fdopen() on file descriptor N, where N is an +integer - this is a libmawk extension. If any of the /dev +heuristics needs to be bypassed (i.e. the script wants to +open the real /dev/stdout or the real /dev/fd/5), the +leading slash should be doubled (e.g. //dev/fd/5).

+ +

The input +function getline has the following variations.

+ +

getline

+ +

reads into $0, updates +the fields, NF, NR and FNR.

+ +

getline < file

+ +

reads into $0 from +file, updates the fields and NF.

+ +

getline var

+ +

reads the next record into +var, updates NR and FNR.

+ +

getline var < +file

+ +

reads the next record of +file into var.

+ +

command | getline

+ +

pipes a record from +command into $0 and updates the fields and +NF.

+ +

command | getline +var

+ +

pipes a record from +command into var.

+ +

Getline returns +0 on end-of-file, −1 on error, otherwise 1.

+ +

Commands on the +end of pipes are executed by /bin/sh.

+ +

The function +close(expr) closes the file or pipe associated +with expr. Close returns 0 if expr is an open +file, the exit status if expr is a piped command, and +−1 otherwise. Close is used to reread a file or +command, make sure the other end of an output pipe is +finished or conserve file resources.

+ +

The function +fflush(expr) flushes the output file or pipe +associated with expr. Fflush returns 0 if expr +is an open output stream else −1. Fflush without an +argument flushes stdout. Fflush with an empty argument +("") flushes all open output.

+ +

The function +system(expr) uses /bin/sh to execute +expr and returns the exit status of the command +expr. Changes made to the ENVIRON array are +not passed to commands executed with system or +pipes.

+ +

10. User +defined functions
+The syntax for a user defined function is

+ + +

function +name( args ) { statements }

+ +

The function +body can contain a return statement

+ +

return +opt_expr

+ +

A return +statement is not required. Function calls may be nested or +recursive. Functions are passed expressions by value and +arrays by reference. Extra arguments serve as local +variables and are initialized to null. For example, +csplit(s,A) puts each character of s into +array A and returns the length of s.

+ +

function +csplit(s, A, n, i)

+ + + + + + + + + + + + + + + + + + + + + + +
+ + +

{

+ + +

n = length(s)

+ + +

for( i = 1 ; i <= n ; i++ ) A[i] = substr(s, i, +1)

+ + +

return n

+ + +

}

+ +

Putting extra +space between passed arguments and local variables is +conventional. Functions can be referenced before they are +defined, but the function name and the ’(’ of +the arguments must touch to avoid confusion with +concatenation.

+ +

11. +Splitting strings, records and files
+Awk programs use the same algorithm to split strings into +arrays with split(), and records into fields on FS. +lmawk uses essentially the same algorithm to split +files into records on RS.

+ + +

Split(expr,A,sep) +works as follows:

+ + + + + + + + + + + + + + + + + +
+ + +

(1)

+ + +

If sep is omitted, it is replaced by FS. +Sep can be an expression or regular expression. If it +is an expression of non-string type, it is converted to +string.

+ + +

(2)

+ + +

If sep = " " (a single space), then +<SPACE> is trimmed from the front and back of +expr, and sep becomes <SPACE>. +lmawk defines <SPACE> as the regular expression +/[ \t\n]+/. Otherwise sep is treated as a +regular expression, except that meta-characters are ignored +for a string of length 1, e.g., split(x, A, "*") +and split(x, A, /\*/) are the same.

+ + +

(3)

+ + +

If expr is not string, it is converted to string. +If expr is then the empty string "", +split() returns 0 and A is set empty. Otherwise, all +non-overlapping, non-null and longest matches of sep +in expr, separate expr into fields which are +loaded into A. The fields are placed in A[1], A[2], +..., A[n] and split() returns n, the number of fields which +is the number of matches plus one. Data placed in A +that looks numeric is typed number and string.

+ +

Splitting +records into fields works the same except the pieces are +loaded into $1, $2,..., $NF. If +$0 is empty, NF is set to 0 and all $i +to "".

+ +

lmawk +splits files into records by the same algorithm, but with +the slight difference that RS is really a terminator +instead of a separator. (ORS is really a terminator +too).

+ +

E.g., if +FS = ":+" and $0 = "a::b:" +, then NF = 3 and $1 = "a", +$2 = "b" and $3 = "", but +if "a::b:" is the contents of an input file and +RS = ":+", then there are two records +"a" and "b".

+ +

RS = +" " is not special.

+ +

If FS = +"", then lmawk breaks the record into +individual characters, and, similarly, +split(s,A,"") places the individual +characters of s into A.

+ +

12. +Multi-line records
+Since lmawk interprets RS as a regular +expression, multi-line records are easy. Setting RS = +"\n\n+", makes one or more blank lines separate +records. If FS = " " (the default), then +single newlines, by the rules for <SPACE> above, +become space and single newlines are field separators.

+ +

For example, if +a file is "a b\nc\n\n", RS = +"\n\n+" and FS = " ", then +there is one record "a b\nc" with three +fields "a", "b" and "c". +Changing FS = "\n", gives two fields +"a b" and "c"; changing FS = +"", gives one field identical to the record.

+ +

If you want +lines with spaces or tabs to be considered blank, set +RS = "\n([ \t]*\n)+". For +compatibility with other awks, setting RS = +"" has the same effect as if blank lines are +stripped from the front and back of files and then records +are determined as if RS = "\n\n+". Posix +requires that "\n" always separates records when +RS = "" regardless of the value of +FS. lmawk does not support this convention, +because defining "\n" as <SPACE> makes it +unnecessary.

+ +

Most of the +time when you change RS for multi-line records, you +will also want to change ORS to "\n\n" so +the record spacing is preserved on output.

+ +

13. Program +execution
+This section describes the order of program execution. First +ARGC is set to the total number of command line +arguments passed to the execution phase of the program. +ARGV[0] is set the name of the AWK interpreter and +ARGV[1] ... ARGV[ARGC-1] holds the remaining +command line arguments exclusive of options and program +source. For example with

+ +

lmawk −f +prog v=1 A t=hello B

+ +

ARGC = 5 +with ARGV[0] = "lmawk", ARGV[1] = +"v=1", ARGV[2] = "A", +ARGV[3] = "t=hello" and ARGV[4] = +"B".

+ +

Next, each +BEGIN block is executed in order. If the program +consists entirely of BEGIN blocks, then execution +terminates, else an input stream is opened and execution +continues. If ARGC equals 1, the input stream is set +to stdin, else the command line arguments ARGV[1] ... +ARGV[ARGC-1] are examined for a file argument.

+ +

The command +line arguments divide into three sets: file arguments, +assignment arguments and empty strings "". An +assignment has the form var=string. When an +ARGV[i] is examined as a possible file argument, if +it is empty it is skipped; if it is an assignment argument, +the assignment to var takes place and i skips +to the next argument; else ARGV[i] is opened for +input. If it fails to open, execution terminates with exit +code 2. If no command line argument is a file argument, then +input comes from stdin. Getline in a BEGIN action +opens input. "−" as a file argument denotes +stdin.

+ +

Once an input +stream is open, each input record is tested against each +pattern, and if it matches, the associated +action is executed. An expression pattern matches if +it is boolean true (see the end of section 2). A +BEGIN pattern matches before any input has been read, +and an END pattern matches after all input has been +read. A range pattern, expr1,expr2 , matches +every record between the match of expr1 and the match +expr2 inclusively.

+ +

When end of +file occurs on the input stream, the remaining command line +arguments are examined for a file argument, and if there is +one it is opened, else the END pattern is +considered matched and all END actions are +executed.

+ +

In the example, +the assignment v=1 takes place after the BEGIN +actions are executed, and the data placed in v is +typed number and string. Input is then read from file A. On +end of file A, t is set to the string "hello", and +B is opened for input. On end of file B, the END +actions are executed.

+ +

Program flow at +the pattern {action} level can be changed with +the

+ + +

next

+ + + + + +
+ + +

exit opt_expr

+ +

statements. A +next statement causes the next input record to be +read and pattern testing to restart with the first +pattern {action} pair in the program. An exit +statement causes immediate execution of the END +actions or program termination if there are none or if the +exit occurs in an END action. The +opt_expr sets the exit value of the program unless +overridden by a later exit or subsequent error.

+ +

14. +include
+libmawk introduces source inclusion feature. Syntax is:

+ + + + + +
+ + +

include "filename"

+ +

Include +statements must be on top level (outside of blocks). If file +name
+starts with a plus sign (’+’), the script file +is not loaded if it has
+been already loaded (by another include or -f command line +argument).

+ +

EXAMPLES + +

+ + +

1. emulate +cat.

+ + + + + +
+ + +

{ print }

+ +

2. emulate +wc.

+ + + + + + + + + + + + + + +
+ + +

{ chars += length($0) + 1 # add one for the \n

+ + +

words += NF

+ + +

}

+ + +

END{ print NR, words, chars }

+ +

3. count the +number of unique "real words".

+ + + + + + + + + + + + + + + + + + + + +
+ + +

BEGIN { FS = "[^A-Za-z]+" }

+ + +

{ for(i = 1 ; i <= NF ; i++) word[$i] = "" +}

+ + +

END { delete word[""]

+ + +

for ( i in word ) cnt++

+ + +

print cnt

+ + +

}

+ +

4. sum the +second field of every record based on the first field.

+ + + + + + + + + + + +
+ + +

$1 ~ /credit|gain/ { sum += $2 }

+ + +

$1 ~ /debit|loss/ { sum −= $2 }

+ + +

END { print sum }

+ +

5. sort a file, +comparing as string

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

{ line[NR] = $0 "" } # make sure of comparison +type

+
+ + + +
+ + +

# in case some lines look numeric

+
+ + +

END { isort(line, NR)

+
+ + +

for(i = 1 ; i <= NR ; i++) print line[i]

+
+ + +

}

+
+ + +

#insertion sort of A[1..n]

+
+ + + +

function isort( A, n,

+ + +

i, j, hold)

+ + +

{

+
+ + +

for( i = 2 ; i <= n ; i++)

+
+ + +

{

+
+ + +

hold = A[j = i]

+
+ + +

while ( A[j−1] > hold )

+
+ + +

{ j−− ; A[j+1] = A[j] }

+
+ + +

A[j] = hold

+
+ + +

}

+
+ + +

# sentinel A[0] = "" will be created if +needed

+
+ + +

}

+
+ +

COMPATIBILITY ISSUES + +

+ + +

The Posix +1003.2(draft 11.3) definition of the AWK language is AWK as +described in the AWK book with a few extensions that +appeared in SystemVR4 nawk. The extensions are:

+ +

New functions: +toupper() and tolower(); libmawk extensions: call(), +acall(), valueof().

+ +

New variables: +ENVIRON[] and CONVFMT; libmawk extension: ERRNO, LIBPATH. As +a libmawk extension, ENVIRON affects the environment of +children processes.

+ +

As a libmawk +extension, new built-in variable LIBPATH is used as a list +of search paths while loading scripts from the command line +or from include.

+ +

If a script +name starts with plus (’+’), the file is not +loaded if it has been loaded earlier (to avoid double +loading libs trough -f and/or include). This is a libmawk +extension.

+ +

It is possible +to include a script from another script using keyword +include "scriptname.awk" (libmawk extension).

+ +

ANSI C +conversion specifications for printf() and sprintf().

+ +

New command +options: −v var=value, multiple -f options and +implementation options as arguments to −W.

+ +

Posix AWK is +oriented to operate on files a line at a time. RS can +be changed from "\n" to another single character, +but it is hard to find any use for this — there are no +examples in the AWK book. By convention, RS = +"", makes one or more blank lines separate +records, allowing multi-line records. When RS = +"", "\n" is always a field separator +regardless of the value in FS.

+ +

lmawk, +on the other hand, allows RS to be a regular +expression. When "\n" appears in records, it is +treated as space, and FS always determines +fields.

+ +

Removing the +line at a time paradigm can make some programs simpler and +can often improve performance. For example, redoing example +3 from above,

+ + + + + + + + + + + + + + + + + + + + +
+ + +

BEGIN { RS = "[^A-Za-z]+" }

+ + +

{ word[ $0 ] = "" }

+ + +

END { delete word[ "" ]

+ + +

for( i in word ) cnt++

+ + +

print cnt

+ + +

}

+ +

counts the +number of unique words by making each word a record. On +moderate size files, lmawk executes twice as fast, +because of the simplified inner loop.

+ +

The following +program replaces each comment by a single space in a C +program file,

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

BEGIN {

+
+ + +

RS = "/\*([^*]|\*+[^/*])*\*+/"

+
+ + + + +

# comment is record separator

+ + +

ORS = " "

+
+ + +

getline hold

+
+ +

}

+ +

{ print hold ; +hold = $0 }

+ +

END { printf +"%s" , hold }

+ +

Buffering one +record is needed to avoid terminating the last record with a +space.

+ +

With +lmawk, the following are all equivalent,

+ + + + + +
+ + +

x ~ /a\+b/ x ~ "a\+b" x ~ +"a\\+b"

+ +

The strings get +scanned twice, once as string and once as regular +expression. On the string scan, lmawk ignores the +escape on non-escape characters while the AWK book advocates +\c be recognized as c which necessitates the +double escaping of meta-characters in strings. Posix +explicitly declines to define the behavior which passively +forces programs that must run under a variety of awks to use +the more portable but less readable, double escape.

+ +

Posix AWK does +not recognize "/dev/std{out,err}" or \x hex escape +sequences in strings. Unlike ANSI C, lmawk limits the +number of digits that follows \x to two as the current +implementation only supports 8 bit characters. The built-in +fflush first appeared in a recent (1993) AT&T awk +released to netlib, and is not part of the posix standard. +Aggregate deletion with delete array is not +part of the posix standard.

+ +

Posix +explicitly leaves the behavior of FS = "" +undefined, and mentions splitting the record into characters +as a possible interpretation, but currently this use is not +portable across implementations.

+ +

Finally, here +is how lmawk handles exceptional cases not discussed +in the AWK book or the Posix draft. It is unsafe to assume +consistency across awks and safe to skip to the next +section.

+ +

substr(s, i, n) +returns the characters of s in the intersection of the +closed interval [1, length(s)] and the half-open interval +[i, i+n). When this intersection is empty, the empty string +is returned; so substr("ABC", 1, 0) = "" +and substr("ABC", −4, 6) = +"A".

+ +

Every string, +including the empty string, matches the empty string at the +front so, s ~ // and s ~ "", are always 1 as is +match(s, //) and match(s, ""). The last two set +RLENGTH to 0.

+ +

index(s, t) is +always the same as match(s, t1) where t1 is the same as t +with metacharacters escaped. Hence consistency with match +requires that index(s, "") always returns 1. Also +the condition, index(s,t) != 0 if and only t is a substring +of s, requires index("","") = 1.

+ +

If getline +encounters end of file, getline var, leaves var unchanged. +Similarly, on entry to the END actions, $0, +the fields and NF have their value unaltered from the +last record.

+ +

SEE ALSO + +

+ + + +

egrep(1), +mawk(1)

+ +

Aho, Kernighan +and Weinberger, The AWK Programming Language, +Addison-Wesley Publishing, 1988, (the AWK book), defines the +language, opening with a tutorial and advancing to many +interesting programs that delve into issues of software +design and analysis relevant to programming in any +language.

+ +

The GAWK +Manual, The Free Software Foundation, 1991, is a +tutorial and language reference that does not attempt the +depth of the AWK book and assumes the reader may be a novice +programmer. The section on AWK arrays is excellent. It also +discusses Posix requirements for AWK.

+ +

BUGS + +

+ + +

lmawk +cannot handle ascii NUL \0 in the source or data files. You +can output NUL using printf with %c, and any other 8 bit +character is acceptable input.

+ +

lmawk +implements printf() and sprintf() using the C library +functions, printf and sprintf, so full ANSI compatibility +requires an ANSI C library. In practice this means the h +conversion qualifier may not be available. Also lmawk +inherits any bugs or limitations of the library +functions.

+ +

Implementors of +the AWK language have shown a consistent lack of imagination +when naming their programs.

+ +

AUTHOR + +

+ + +

mawk: +Mike Brennan (brennan@whidbey.com).

+ +

libmawk +extensions: Tibor Palinkas (libmawk@igor2.repo.hu).

+
+ + diff --git a/doc/numeric.html b/doc/numeric.html new file mode 100644 index 0000000..2fb66ac --- /dev/null +++ b/doc/numeric.html @@ -0,0 +1,59 @@ + + +

libmawk numerics

+ Mawk implemented all numeric calculations using double precision floating + point numbers. Libmawk has a compile-time option for using different + types for numerics, with the following choices available: +
    +
  • double (default) +
  • int +
+ +

handling exceptions and NaNs

+ Mawk was geared towards catching floating point errors (such as + division by zero or log(-1)) and report a runtime error as soon as + possible. Libmawk, targeting embedded script application, should minimize + runtime errors by providing means for the script to check for them and + recover. +

+ There are multiple approaches for handling suc errors. A design decision + has been made for exclusively using NaN (Not a Number) - on platforms where it is not + implemented by the FPU or by libc, it's emulated. NaN support shall work + the same way for all available numeric types. +

+ Rules about NaN are few and simple: +

    +
  • 1. NaN should be a sepcial value that can not be mistaken for a valid number by the script +
  • 2. math library calls (e.g. log()) should return NaN for invalid input; library calls shall accept NaN as input and return NaN as output +
  • 3. other library functions should handle NaN properly (i.e. printf %f or print shall write "nan" and a string-to-number conversion shall be able to understand "nan") +
  • 4. if any input of a calculation is NaN, the result shall be NaN (e.g. NaN+1 is NaN); such calculations never cause runtime error +
  • 5. 1/0, 0/0 and similar corner cases are all NaN - there is no inf +
  • 6. the only runtime error that may be caused by NaNs is when a conditional jump depends on a NaN (e.g. if (nan) {}) +
  • 7. isnan(x) returns 1 if x is NaN. +
+

+ In practice this means a block of numeric calculations can be done safely, + without checking any input or intermediate results. At the end of the + block the result(s) shall be checked with isnan(). As long as the results + (even indirectly) depend on an input or intermediate result that is NaN, + the result is guaranteed to be NaN too, without the risk of a runtime error. + + +

Implementation

+

type: double

+ If the system has FPE, it's disabled. If the system does not have native + NaN, a special NaN value is defined (using HUGE_VAL) and + before each operation all inputs are checked for NaN to make sure the + output is NaN too. On systems with NaN + ### TODO: inf? ### + +

type: int

+ ### TODO: ### + +

division by zero

+ Before divisions input is always checked and division by zero is + replaced by a NaN. The same mechanism is in place for all numeric types. + + + + diff --git a/doc/portability.html b/doc/portability.html new file mode 100644 index 0000000..339be4f --- /dev/null +++ b/doc/portability.html @@ -0,0 +1,76 @@ + + +

Portability with scconfig

+ + Libmawk is configured using an embedded copy of scconfig. In a distribution + tarball, it's a snapshot of scconfig; in an svn repository it is an + svn external, which guarantees libmawk is using the latest version + of scconfig. +

+ Scconfig is a self-contained project configuration tool which depends + only on Make and an ANSI (C89) C compiler. Scconfig can test different + things on a system, often by compiling and running test programs. Besides + saving test results in a text file, it can also generate text files like + Makefiles or include files with content depending on those test results. +

+ There are artificial limits on portability tho. While in theory it is + possible to scretch the system so that it really works on all operating + systems ever existed, or at least on all UNIX-like systems, it is not the + goal. This decision is a trade-off between portability and maintainability. + Also between portability and code size (or bloat). The artificial limits are: +

    +
  • a C compiler that fully supports ANSI (C89) C is required +
  • make supporting traditional Makefiles is required +
  • no efforts for systems older than 80s UNIX systems; for example libmawk depends on pipe(2), and if it is missing, some of the functionality will be missing +
  • mawk had support for DOS - this is removed, because +
      +
    • it was already marked as somewhat obsolete or at least untested in version 1.3.3, which is the base for libmawk; documentation suggested users should use an older version of mawk on DOS +
    • since DOS poses drastic limitation on memory, and mawk seemed to be a memory hog, it is hard to believe someone would actually write an application that would share memory with libmawk and still do something useful - and after all, the purpose of libmawk is exactly embedding awk in applications +
    • too many #ifdefs scattered the code for supporting DOS +
    +
+

+ Libmawk 0.9.7 is reported to configure and compile on the + following systems: + +
system compiles +
out of +
the box
FPE problems awklib
test
binary save/load
save/load +
[i386] GNU/Linux yes no ok ok +
[amd64] GNU/Linux yes no ok ok +
[i386] Minix +
[i386] NetBSD 4.0 yes no ok ok +
[i386] open + solaris 5.11 +
[i386] OpenBSD 5.0 yes no ok ok +
[i386] FreeBSD 9.0 yes no ok ok +
[IP22] IRIX 5.3 no minor doesn't work +
[i386] Dragonfly + BSD 2.10 yes no ok ok + +
+ + +

+ Libmawk 0.9.6 is reported to configure and compile on the + following systems: + +
system compiles +
out of +
the box
FPE problems awklib
test
binary save/load
save/load +
i386 GNU/Linux yes no ok ok +
amd64 GNU/Linux yes no ok ok +
i386 Minix yes yes ok ok +
i386 NetBSD 4.0 yes yes ok ok +
i386 open + solaris 5.11 yes no ok ok +
i386 OpenBSD 5.0 yes yes ok ok +
i386 FreeBSD 9.0 yes yes ok ok +
IP22 IRIX 5.3 yes no ok doesn't work +
i386 Dragonfly + BSD 2.10 yes yes ok ok + +
+ + + diff --git a/doc/semi-gnu.html b/doc/semi-gnu.html new file mode 100644 index 0000000..989c147 --- /dev/null +++ b/doc/semi-gnu.html @@ -0,0 +1,60 @@ + + +

Semi-dependency on GNU utils

+Some of the features depend on GNU-specific implementation of standard +software. These are all semi-dependencies, which means if an user doesn't +have GNU software on a system, it is still possible for him/her to compile +libmawk. Unfortunately editing some parts of the source on such a system +would not result in the desired updates. Obviously a main goal is to +minimize this sort of dependency, but sometimes it is very hard or impossible +to avoid. This document describes these dependencies explaining the tradeoff +and the ways around. +

1. Bison

+

1.1. Rationale

+ There are four GNU-specific features in use: +
    +
  • %pure-parser for getting a reentrant parser +
  • %parse-param to pass mawk state around to avoid global variables +
  • %lex-param for the same reason as %parse-param +
  • --name-prefix to avoid namespace pollution +
+ The first three features are critical for having a reentrant/thread safe + libmawk. Without those three, there could be only one mawk context in + an application, or at least parsing more than one script in the same + time would cause both context mangled. +

+ The fourth feature, --name-prefix helps avoiding namespace pollution - + the application may have its own parsers with or without name prefixing, + and libmawk shouldn't collide with those. + +

1.2. effects, restrictions

+ Scconfig detects presence of bison; in case bison is not installed, + Makefile.conf is generated in a way that bison is never run. Output + of bison is included in the source tree (parse.c/parse.h). +

+ Not having bison on a systam means editing parse.y will not + update the actual parser code in parse.c. + +

1.3. how to bypass

+ It is possible to use traditional yacc for compiling parse.y. Besides + editing Makefile.conf and parse.y for removing those 4 features, + scan.c/scan.h should be edited too, because arguments for the scanner + depends on these settings. A global variable for the mawk context for + the script currently being parsed should be introduced. Actions should + be made to ensure no concurrent loading of scripts is possible. + +

2. Makefile.dep needs gcc

+

2.1. Rationale

+ Source file dependencies are generated using gcc -MM and are + stored in Makefile.dep shipped with the source package. + +

2.2. effects, restrictions

+ After changing #include lines in the source, running make depend + will not update Makefile.dep but will zap it. + +

2.3. how to bypass

+ It is easily possible to keep Makefile.dep in sync by hand. Another option + is to ignore Makefile.dep and run make clean before compilation. + + + diff --git a/scconfig/Makefile b/scconfig/Makefile new file mode 100644 index 0000000..a59c677 --- /dev/null +++ b/scconfig/Makefile @@ -0,0 +1,55 @@ +# --- configuration part -- +SRC=src/ +BIN=src/ + +# - generic configuration - +# what cflags to use to compile scconfig +USER_CFLAGS = -g -DGENCALL + +# what ldflags to use to link scconfig +USER_LDFLAGS = + +# in case hooks.c needs to link to something local +USER_OBJS = + +# what to build - a ./configure +all: configure + +# This line imports scconfig core and default tests +include src/default/Makefile.plugin + +# +# - PLUGINS - +# +# Comment this line if you are not interested in c99 features +#include src/c99/Makefile.plugin + +# Comment this line if you do not need script libs to be detected +#include src/scripts/Makefile.plugin + +# Comment this line if you do not need parser libs to be detected +#include src/parser/Makefile.plugin + +# Comment this line if you do not need to detect parser generators +include src/parsgen/Makefile.plugin + +# Comment this line if you do not need math related libs +include src/math/Makefile.plugin + +# Comment this line if you do not need socket/networking +#include src/socket/Makefile.plugin + +# Comment this line if you do not need tmpasm (templating) +include src/tmpasm/Makefile.plugin + +# --- you shouldn't edit the lines below --- +OBJS = $(USER_OBJS) hooks.o $(DEFAULT_OBJS) $(SCRIPT_OBJS) $(PARSER_OBJS) $(TMPASM_OBJS) $(C99_OBJS) $(PARSGEN_OBJS) $(MATH_OBJS) $(SOCKET_OBJS) +CFLAGS = $(USER_CFLAGS) $(DEFAULT_CFLAGS) $(SCRIPT_CFLAGS) $(PARSER_CFLAGS) $(TMPASM_CFLAGS) $(C99_CFLAGS) $(PARSGEN_CFLAGS) $(MATH_CFLAGS) $(SOCKET_CFLAGS) -Isrc/default +LDFLAGS = $(USER_LDFLAGS) $(DEFAULT_LDFLAGS) $(SCRIPT_LDFLAGS) $(PARSER_LDFLAGS) $(TMPASM_LDFLAGS) $(C99_LDFLAGS) $(PARSGEN_LDFLAGS) $(MATH_LDFLAGS) $(SOCKET_LDFLAGS) + +configure: $(OBJS) + $(CC) -o configure $(OBJS) + +clean: + -rm $(OBJS) configure + diff --git a/scconfig/hooks.c b/scconfig/hooks.c new file mode 100644 index 0000000..9e24d00 --- /dev/null +++ b/scconfig/hooks.c @@ -0,0 +1,204 @@ +#include +#include "arg.h" +#include "log.h" +#include "dep.h" +#include "db.h" +#include "tmpasm_scconfig.h" + +#define VER1 "1" +#define VER2 "0" +#define VER3 "0" + +static void help(void) +{ + printf("./configure: configure libmawk.\n"); + printf("\n"); + printf("Usage: ./configure [options]\n"); + printf("\n"); + printf("options are:\n"); + printf(" --prefix=path change installation prefix from /usr/local to path\n"); + printf(" --debug build full debug version (-g -O0, extra asserts)\n"); + printf(" --profile build profiling version if available (-pg)\n"); + printf(" --symbols include symbols (add -g, but no -O0 or extra asserts)\n"); + printf(" --numeric=int change the internal numeric type (default is double)\n"); + printf("\n"); +} + +/* Runs when a custom command line argument is found + returns true if no furhter argument processing should be done */ +int hook_custom_arg(const char *key, const char *value) +{ + if (strcmp(key, "prefix") == 0) { + put("/local/prefix", value); + return 1; + } + if (strcmp(key, "debug") == 0) { + put("/local/debug", strue); + return 1; + } + if (strcmp(key, "symbols") == 0) { + put("/local/symbols", strue); + return 1; + } + if (strcmp(key, "profile") == 0) { + put("/local/profile", strue); + return 1; + } + if (strcmp(key, "numeric") == 0) { + if ((strcmp(value, "int") == 0) || (strcmp(value, "double") == 0)) + put("/local/numeric", value); + else { + fprintf(stderr, "Error: invalid numeric format. Must be int or double\n"); + exit(1); + } + return 1; + } + if (strcmp(key, "help") == 0) { + help(); + exit(0); + } + + return 0; +} + + +/* Runs before anything else */ +int hook_preinit() +{ + return 0; +} + +/* Runs after initialization */ +int hook_postinit() +{ + db_mkdir("/local"); + + /* defaults */ + put("/local/prefix", "/usr/local"); + put("/local/debug", sfalse); + put("/local/symbols", sfalse); + put("/local/profile", sfalse); + + report("Configuring libmawk.\n"); + logprintf(0, "Configuring libmawk.\n"); + return 0; +} + +/* Runs after all arguments are read and parsed */ +int hook_postarg() +{ + if (get("/local/numeric") == NULL) + put("/local/numeric", "double"); + return 0; +} + +/* Runs when things should be detected for the host system */ +int hook_detect_host() +{ + return 0; +} + +/* Runs when things should be detected for the target system */ +int hook_detect_target() +{ + put("/local/version", VER1 "." VER2 "." VER3); + put("/local/version/1", VER1); + put("/local/version/2", VER2); + put("/local/version/3", VER3); + + /* if there was no custom requirement from the command line, run all requirements in non-fatal mode */ + if (num_custom_reqs < 1) { + if (istrue(get("/local/debug"))) { + require("cc/argstd/pedantic", 0, 0); + require("cc/argstd/ansi", 0, 0); + require("cc/argstd/Wall", 0, 0); + append("/target/cc/cflags", " -O0 "); + append("/target/cc/cflags", get("cc/argstd/ansi")); + append("/target/cc/cflags", " "); + append("/target/cc/cflags", get("cc/argstd/pedantic")); + append("/target/cc/cflags", " "); + append("/target/cc/cflags", get("cc/argstd/Wall")); + } + else + append("/target/cc/cflags", " -O3 "); + if (istrue(get("/local/debug")) || istrue(get("/local/symbols"))) + append("/target/cc/cflags", " -g "); + if (istrue(get("/local/profile"))) { + require("cc/argstd/pg", 0, 0); + require("cc/argstd/no-pie", 0, 0); + append("/target/cc/cflags", " "); + append("/target/cc/cflags", get("cc/argstd/pg")); + append("/target/cc/cflags", " "); + append("/target/cc/cflags", get("cc/argstd/no-pie")); + } + + + require("cc/cc", 0, 1); + require("cc/fpic", 0, 1); + require("cc/soname", 0, 0); + require("cc/rdynamic", 0, 0); + require("fstools/chmodx", 0, 1); + require("fstools/cp", 0, 1); + require("fstools/rm", 0, 1); + require("fstools/ln", 0, 1); + require("fstools/mkdir", 0, 1); + require("sys/types/size_t/includes", 0, 0); + require("libs/fs/realpath/presents", 0, 0); + require("libs/env/putenv", 0, 1); + require("libs/io/pipe/presents", 0, 0); + require("libs/math/cc/log/*", 0, 1); + require("libs/math/nan/*", 0, 0); + require("libs/math/isnan/*", 0, 0); + require("libs/math/nanop/*", 0, 0); + + require("parsgen/bison", 0, 0); + printf("Numeric format: %s\n", get("/local/numeric")); + } + return 0; +} + +/* Runs after detection hooks, should generate the output (Makefiles, etc.) */ +int hook_generate() +{ + printf("Generating libmawk/Makefile... "); + fflush(stdout); + if (tmpasm("../src/libmawk", "Makefile.conf.in", "Makefile.conf") == 0) + printf("OK\n"); + else + printf("failed\n"); + + printf("Generating libmawk/conf.h... "); + fflush(stdout); + if (tmpasm("../src/libmawk", "conf.h.in", "conf.h") == 0) + printf("OK\n"); + else + printf("failed\n"); + + printf("Generating awklib/Makefile... "); + fflush(stdout); + if (tmpasm("../src/awklib", "Makefile.in", "Makefile") == 0) + printf("OK\n"); + else + printf("failed\n"); + + printf("Generating awklib/regression/Makefile... "); + fflush(stdout); + db_mkdir("/local"); + if (tmpasm("../src/awklib/regression", "Makefile.in", "Makefile") == 0) + printf("OK\n"); + else + printf("failed\n"); + + return 0; +} + +/* Runs before everything is uninitialized */ +void hook_preuninit() +{ +} + +/* Runs at the very end, when everything is already uninitialized */ +void hook_postuninit() +{ +} + diff --git a/scconfig/src/default/Makefile.plugin b/scconfig/src/default/Makefile.plugin new file mode 100644 index 0000000..3b33700 --- /dev/null +++ b/scconfig/src/default/Makefile.plugin @@ -0,0 +1,141 @@ +DEFAULT_NOMAIN_OBJS = \ + $(BIN)/default/find_cc.o \ + $(BIN)/default/lib_compile.o \ + $(BIN)/default/lib_uniqinc.o \ + $(BIN)/default/lib_file.o \ + $(BIN)/default/lib_try.o \ + $(BIN)/default/str.o \ + $(BIN)/default/ht.o \ + $(BIN)/default/log.o \ + $(BIN)/default/arg.o \ + $(BIN)/default/db.o \ + $(BIN)/default/dep.o \ + $(BIN)/default/deps_default.o \ + $(BIN)/default/find_libs.o \ + $(BIN)/default/find_fscalls.o \ + $(BIN)/default/find_printf.o \ + $(BIN)/default/find_proc.o \ + $(BIN)/default/find_fstools.o \ + $(BIN)/default/find_uname.o \ + $(BIN)/default/find_target.o \ + $(BIN)/default/find_thread.o \ + $(BIN)/default/find_io.o \ + $(BIN)/default/find_time.o \ + $(BIN)/default/find_types.o \ + $(BIN)/default/find_signal.o \ + $(BIN)/default/find_environ.o \ + $(BIN)/default/regex.o \ + $(BIN)/default/lib_filelist.o \ + $(BIN)/default/lib_srctree.o \ + $(BIN)/default/lib_pkg_config.o \ + $(BIN)/default/find_str.o \ + $(BIN)/default/find_sys.o + +DEFAULT_MAIN_OBJS = \ + $(BIN)/default/main.o \ + $(BIN)/default/main_custom_args.o \ + $(BIN)/default/main_lib.o + +DEFAULT_OBJS = $(DEFAULT_NOMAIN_OBJS) $(DEFAULT_MAIN_OBJS) + +$(BIN)/default/lib_compile.o: $(SRC)/default/lib_compile.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/lib_compile.c -o $(BIN)/default/lib_compile.o + +$(BIN)/default/lib_file.o: $(SRC)/default/lib_file.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/lib_file.c -o $(BIN)/default/lib_file.o + +$(BIN)/default/lib_try.o: $(SRC)/default/lib_try.c $(SRC)/default/log.h $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/lib_try.c -o $(BIN)/default/lib_try.o + +$(BIN)/default/str.o: $(SRC)/default/str.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/str.c -o $(BIN)/default/str.o + +$(BIN)/default/ht.o: $(SRC)/default/ht.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/ht.c -o $(BIN)/default/ht.o + +$(BIN)/default/log.o: $(SRC)/default/log.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/log.c -o $(BIN)/default/log.o + +$(BIN)/default/arg.o: $(SRC)/default/arg.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/arg.c -o $(BIN)/default/arg.o + +$(BIN)/default/db.o: $(SRC)/default/db.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/db.c -o $(BIN)/default/db.o + +$(BIN)/default/dep.o: $(SRC)/default/dep.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/dep.c -o $(BIN)/default/dep.o + +$(BIN)/default/deps_default.o: $(SRC)/default/deps_default.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/deps_default.c -o $(BIN)/default/deps_default.o + +$(BIN)/default/find_libs.o: $(SRC)/default/find_libs.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_libs.c -o $(BIN)/default/find_libs.o + +$(BIN)/default/find_fscalls.o: $(SRC)/default/find_fscalls.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_fscalls.c -o $(BIN)/default/find_fscalls.o + +$(BIN)/default/find_signal.o: $(SRC)/default/find_signal.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_signal.c -o $(BIN)/default/find_signal.o + +$(BIN)/default/find_printf.o: $(SRC)/default/find_printf.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_printf.c -o $(BIN)/default/find_printf.o + +$(BIN)/default/find_proc.o: $(SRC)/default/find_proc.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_proc.c -o $(BIN)/default/find_proc.o + +$(BIN)/default/find_fstools.o: $(SRC)/default/find_fstools.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_fstools.c -o $(BIN)/default/find_fstools.o + +$(BIN)/default/find_uname.o: $(SRC)/default/find_uname.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_uname.c -o $(BIN)/default/find_uname.o + +$(BIN)/default/find_target.o: $(SRC)/default/find_target.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_target.c -o $(BIN)/default/find_target.o + +$(BIN)/default/regex.o: $(SRC)/default/regex.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/regex.c -o $(BIN)/default/regex.o + +$(BIN)/default/lib_filelist.o: $(SRC)/default/lib_filelist.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/lib_filelist.c -o $(BIN)/default/lib_filelist.o + +$(BIN)/default/lib_srctree.o: $(SRC)/default/lib_srctree.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/lib_srctree.c -o $(BIN)/default/lib_srctree.o + +$(BIN)/default/lib_pkg_config.o: $(SRC)/default/lib_pkg_config.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/lib_pkg_config.c -o $(BIN)/default/lib_pkg_config.o + +$(BIN)/default/lib_uniqinc.o: $(SRC)/default/lib_uniqinc.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/lib_uniqinc.c -o $(BIN)/default/lib_uniqinc.o + +$(BIN)/default/find_sys.o: $(SRC)/default/find_sys.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_sys.c -o $(BIN)/default/find_sys.o + +$(BIN)/default/find_str.o: $(SRC)/default/find_str.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_str.c -o $(BIN)/default/find_str.o + +$(BIN)/default/find_cc.o: $(SRC)/default/find_cc.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_cc.c -o $(BIN)/default/find_cc.o + +$(BIN)/default/find_environ.o: $(SRC)/default/find_environ.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_environ.c -o $(BIN)/default/find_environ.o + +$(BIN)/default/find_io.o: $(SRC)/default/find_io.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_io.c -o $(BIN)/default/find_io.o + +$(BIN)/default/find_time.o: $(SRC)/default/find_time.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_time.c -o $(BIN)/default/find_time.o + +$(BIN)/default/find_types.o: $(SRC)/default/find_types.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_types.c -o $(BIN)/default/find_types.o + +$(BIN)/default/main.o: $(SRC)/default/main.c $(SRC)/default/dep.h $(SRC)/default/libs.h Makefile + $(CC) $(CFLAGS) -c $(SRC)/default/main.c -o $(BIN)/default/main.o + +$(BIN)/default/main_custom_args.o: $(SRC)/default/main_custom_args.c + $(CC) $(CFLAGS) -c $(SRC)/default/main_custom_args.c -o $(BIN)/default/main_custom_args.o + +$(BIN)/default/main_lib.o: $(SRC)/default/main_lib.c + $(CC) $(CFLAGS) -c $(SRC)/default/main_lib.c -o $(BIN)/default/main_lib.o + +$(BIN)/default/find_thread.o: $(SRC)/default/find_thread.c $(SRC)/default/dep.h $(SRC)/default/libs.h + $(CC) $(CFLAGS) -c $(SRC)/default/find_thread.c -o $(BIN)/default/find_thread.o diff --git a/scconfig/src/default/arg.c b/scconfig/src/default/arg.c new file mode 100644 index 0000000..a50f48a --- /dev/null +++ b/scconfig/src/default/arg.c @@ -0,0 +1,106 @@ +/* + scconfig - command line argument processing + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include "db.h" +#include "arg.h" +#include "dep.h" +#include "log.h" +#include "libs.h" + +argtbl_t main_argument_table[] = { + {"import", NULL, import_args, "Import saved config (sub)tree"}, + {"target", "/arg/sys/target", NULL, "set cross compilation target (prefix)"}, + {"target-name", "/arg/sys/target-name", NULL, "set cross compilation target (system name)"}, + {"target-shell","/arg/sys/target-shell",NULL, "set the shell on cross compilation target"}, + {"emu", "/arg/sys/emu", NULL, "emulator for testing cross compiled executables with"}, + {"pkg-config", "/arg/sys/pkg-config", NULL, "path to pkg-config to use"}, + {"pkg-config-zap","/arg/sys/pkg-config-zap",NULL, "ignore pkg-config results by this regex pattern"}, + +/* wildcard rules for icl() control */ + {"^ldflags/", NULL, import_icl, NULL}, + {"^cflags/", NULL, import_icl, NULL}, + {"^includes/", NULL, import_icl, NULL}, + {"^prefix/", NULL, import_icl, NULL}, + +/* the followings are autoconf compatibility translations */ + {"CC", "/arg/cc/cc", NULL, "Force using a C compiler (command line)"}, + {"CFLAGS", "/arg/cc/cflags", NULL, "Force using a CFLAGS for C compilation"}, + {"LDFLAGS", "/arg/cc/ldflags", NULL, "Force using a LDFLAGS for linking"}, + {"LDL", "/arg/libs/ldl", NULL, "Force using -ldl string"}, + + {"gpmi-prefix", "/arg/gpmi/prefix", NULL, NULL}, + {NULL, NULL, NULL, NULL} +}; + +void process_args(int argc, char *argv[]) +{ + int n; + char *key, *value; + argtbl_t *a; + int found, tainted = 0; + + db_mkdir("/arg"); + + logprintf(0, "CLI arg 0: '%s'\n", argv[0]); + for(n = 1; n < argc; n++) { + key = argv[n]; + logprintf(0, "CLI arg %d: '%s'\n", n, key); + while(*key == '-') key++; + value = str_chr(key, '='); + found = 0; + if (value != NULL) { + *value = '\0'; + value++; + /* Look in the argument translate table */ + for(a = main_argument_table; a->arg != NULL; a++) { + if (((a->arg[0] == '^') && (strncmp(a->arg+1, key, strlen(a->arg+1)) == 0)) || (strcmp(a->arg, key) == 0)) { + found = 1; + if (a->callback != NULL) { + if (a->callback(key, value) != 0) { + error("Processing argument '%s' failed in the callback\n", argv[n]); + abort(); + } + } + if (a->path != NULL) + put(a->path, value); + } + } + /* Look in known deps table or /arg */ + if (found == 0) { + if ((is_dep_known(key)) || (strncmp(key, "/arg/", 5) == 0)) { + put(key, value); + found = 1; + } + } + } + if (found == 0) { + if (custom_arg(key, value) == 0) { + error("Unknown argument '%s'\n", key); + tainted++; + } + } + } + if (tainted) + exit(1); +} diff --git a/scconfig/src/default/arg.h b/scconfig/src/default/arg.h new file mode 100644 index 0000000..f94eb31 --- /dev/null +++ b/scconfig/src/default/arg.h @@ -0,0 +1,22 @@ +#ifndef SCC_ARG_H +#define SCC_ARG_H + +typedef struct { + char *arg; + char *path; + int (*callback)(const char *key, const char *value); + char *help; +} argtbl_t; + +extern argtbl_t main_argument_table[]; + + + +void process_args(int argc, char *argv[]); + + +/* main.c: */ +extern int custom_arg(const char *key, const char *value); +extern int num_custom_reqs; + +#endif diff --git a/scconfig/src/default/db.c b/scconfig/src/default/db.c new file mode 100644 index 0000000..5d012a5 --- /dev/null +++ b/scconfig/src/default/db.c @@ -0,0 +1,443 @@ +/* + scconfig - database + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include +#include "db.h" +#include "log.h" +#include "libs.h" + +ht_t *DBs = NULL; +char *db_cwd = NULL; + +void append(const char *key, const char *value) +{ + const char *orig; + char *new; + int l1, l2; + + assert(key != NULL); + assert(value != NULL); + if (*value == '\0') + return; + + orig = get(key); + if (orig == NULL) { + put(key, value); + return; + } + + l1 = strlen(orig); + l2 = strlen(value); + new = malloc(l1 + l2 + 1); + memcpy(new, orig, l1); + memcpy(new + l1, value, l2); + new[l1+l2] = '\0'; + put(key, new); +} + + +static const char *db_split_path(const char *key, ht_t **ht, char **fld) +{ + size_t fld_len; + const char *path; + char first_level_dir[32]; + + path = str_chr((char *)(key+1), '/'); + assert(path != NULL); + fld_len = path - key; + path++; + if (*path == '\0') { + *ht = NULL; + if (fld != NULL) + *fld = NULL; + return NULL; + } + assert(fld_len < sizeof(first_level_dir)); + strncpy(first_level_dir, key, fld_len); + first_level_dir[fld_len] = '\0'; + *ht = ht_get(DBs, first_level_dir+1); + if (fld != NULL) + *fld = first_level_dir; + return path; +} + +static void export_qs(FILE *f, const char *s) +{ + fputc('"', f); + for(;*s != '\0';s++) { + switch(*s) { + case '"': fputc('\\', f); fputc('"', f); break; + case '\n': fputc('\\', f); fputc('n', f); break; + case '\r': fputc('\\', f); fputc('r', f); break; + case '\t': fputc('\\', f); fputc('t', f); break; + default: fputc(*s, f); + } + } + + fputc('"', f); + fputc('\n', f); +} + +static int needs_quote(const char *s) { + for(; *s != '\0'; s++) + if ((*s < 32) || (*s > 126) || (*s == '"')) return 1; + return 0; +} + +int export_(FILE *f, int export_empty, ht_t *table, const char *fld) +{ + ht_entry_t *h; + + for(h = ht_first(table); h != NULL; h = ht_next(table, h)) + if (export_empty || ((h->value != NULL) && (*(char *)h->value != '\0'))) { + fprintf(f, "/%s/%s=", fld, h->key); + if (h->value != NULL) { + if (needs_quote((char *)h->value)) + export_qs(f, (const char *)h->value); + else + fprintf(f, "%s\n", (char *)h->value); + } + else + fprintf(f, "\n"); + } + return 0; +} + +int export(const char *fn, int export_empty, const char *root) +{ + FILE *f; + int ret; +/* ht_t *table; */ + ht_entry_t *h; + + if (fn != NULL) { + f = fopen(fn, "w"); + if (f == NULL) + return -1; + } + else + f = stdout; + + if ((root == NULL) || ((root[0] == '/') && (root[1] == '\0'))) { + /* export all directories */ + for(h = ht_first(DBs); h != NULL; h = ht_next(DBs, h)) + ret += export_(f, export_empty, h->value, h->key); + } + else { + error("not yet implemented\n"); + abort(); + /* db_split_path(); */ + } + + if (f != stdout) + fclose(f); + + return ret; +} + +/* append a single character, grow the buffer as needed */ +#define qappend(chr) \ +do { \ + if (used >= alloced) { \ + alloced += 256; \ + res = realloc(res, alloced); \ + } \ + res[used] = chr; \ + used++; \ +} while(0) + +/* read until end of quote and interpret backslash sequences if do_esc is non-zero */ +static char *readq(FILE *f, char *str, long strmax, int quote, int do_esc, int *num_lines, const char *fn) +{ + int bs = 0; + long used = 0, alloced = 0; + char *res = NULL, *s; + + for(;;) { + for(s = str; *s != '\0'; s++) { + if (*s == '\n') (*num_lines)++; + + if (bs) { /* character escaped by backslash */ + switch(*s) { + case '\\': qappend('\\'); break; + case 'n': qappend('\n'); break; + case 'r': qappend('\r'); break; + case 't': qappend('\t'); break; + default: qappend(*s); break; + } + bs = 0; + } + else if (*s == quote) { /* end */ + qappend('\0'); + if ((s[1] != '\r') && (s[1] != '\n') && (s[1] != '\0')) + fprintf(stderr, "Warning: trailing text after quote ignored in %s:%d\n", fn, (*num_lines)+1); + return res; + } + else if (do_esc && (*s == '\\')) bs = 1; /* backslash start */ + else qappend(*s); /* plain character */ + } + + /* get the next chunk */ + fgets(str, strmax, f); + } + + return NULL; /* can't get here */ +} + +int import(const char *fn) +{ + char line[1024]; + char *key, *value, *nl, *slash; + int num_records, num_lines; + FILE *f; + + f = fopen(fn, "r"); + if (f == NULL) + return -1; + + for(num_records = 0, num_lines = 0; !feof(f); num_lines++) { + *line = '\0'; + fgets(line, sizeof(line) - 1, f); + if ((*line != '#') && (*line != '\n') && (*line != '\r') && (*line != '\0')) { + int quote, do_esc=0; + key = line; + value = str_chr(key, '='); + if (value == NULL) { + error("Error importing: missing '=' in line %d in file %s.\n", num_lines, fn); + abort(); + } + num_records++; + *value = '\0'; + value++; + if (*value == '"') { + quote=*value; + value++; + do_esc=1; + } + else if (*value == '\'') { + quote=*value; + value++; + } + else + quote=0; + + if (!quote) { + nl = str_chr(value, '\n'); + if (nl != NULL) + *nl = '\0'; + } + else + value = readq(f, value, sizeof(line) - (value - line) - 4, quote, do_esc, &num_lines, fn); + + slash = str_chr(key+1, '/'); + if (slash == NULL) { + error("Error importing: no directory name for %s.\n", key); + abort(); + } + *slash = '\0'; + db_mkdir(key); + *slash = '/'; + put(key, value); + logprintf(0, "(Import from '%s': '%s'='%s')\n", fn, key, value); + if (quote) + free(value); + } + } + + fclose(f); + return num_records; +} + +int import_args(const char *key, const char *fn) +{ + (void) key; /* suppress compiler warnings for unused key; needed because function pointers to this function from arg.c */ + db_mkdir("/target"); + db_mkdir("/host"); + + return import(fn) < 0; +} + + +static const char *db_get(const char *key) +{ + const char *path; + ht_t *ht; + + path = db_split_path(key, &ht, NULL); + if (ht == NULL) + return NULL; + return ht_get(ht, path); +} + +static const char *db_put(const char *key, const char *value) +{ + const char *path; + ht_t *ht; + + path = db_split_path(key, &ht, NULL); + if (ht == NULL) { + error("db_put: can't find top level hash for '%s'\n", key); + abort(); + } + return ht_set(ht, path, (void *)value); +} + +#define assamble_path \ + assert(strlen(key) + strlen(db_cwd) < sizeof(tmp)-1); \ + sprintf(tmp, "%s/%s", db_cwd, key); + +const char *get(const char *key) +{ + char tmp[256]; + + if (*key == '/') + return db_get(key); + assamble_path; + return db_get(tmp); +} + +const char *put(const char *key, const char *value) +{ + char tmp[256]; + + if (*key == '/') + return db_put(key, value); + assamble_path; + return db_put(tmp, value); +} + +void db_init(void) +{ + DBs = ht_resize(ht_alloc(0), 16); +} + +void db_uninit(void) +{ + ht_entry_t *h; + ht_t *dir; + + for(h = ht_first(DBs); h != NULL; h = ht_next(DBs, h)) { + dir = h->value; + dir->refcount--; + if (dir->refcount == 0) + ht_free(dir); + } + ht_free(DBs); + if (db_cwd != NULL) + free(db_cwd); +/* Just in case someone calls db_init again... */ + db_cwd = NULL; + DBs = NULL; +} + +void db_cd(const char *path) +{ + assert(*path == '/'); + if (db_cwd != NULL) + free(db_cwd); + db_cwd = strclone(path); +} + +void db_mkdir(const char *path) +{ + ht_t *ht, *target; + assert(*path == '/'); + target = ht_get(DBs, path+1); + if (target == NULL) { + ht = ht_resize(ht_alloc(1), 256); + ht_set(DBs, path+1, ht); + } +} + +void db_rmdir(const char *path) +{ + ht_t *ht; + assert(*path == '/'); + ht = ht_get(DBs, path+1); + if (ht == NULL) + return; + ht_del(DBs, path+1); +/* ht_free(ht); */ +} + +void db_link(const char *existing, const char *new) +{ + ht_t *ht; + + assert(*new == '/'); + ht = ht_get(DBs, existing+1); + assert(ht != NULL); + ht_set(DBs, new+1, ht); + ht->refcount++; +} + +char *concat_nodes(const char *prefix, ...) +{ + char *buff; + const char *node, *value; + int allocated = 256, len, totallen; + + va_list ap; + va_start(ap, prefix); + buff = malloc(allocated); + if (prefix != NULL) { + strcpy(buff, prefix); + totallen = strlen(prefix); + buff[totallen] = ' '; + totallen++; + } + else + totallen = 0; + + while((node = va_arg(ap, const char *)) != NULL) { + value = get(node); + if (value != NULL) { + len = strlen(value); + if (totallen + len >= allocated) { + allocated = totallen + len + 256; + buff = realloc(buff, allocated); + } + memcpy(buff + totallen, value, len); + totallen += len; + buff[totallen] = ' '; + totallen++; + + buff[totallen] = '\0'; + } + } + + buff[totallen - 1] = '\0'; + va_end(ap); + return buff; +} + +int node_istrue(const char *key) +{ + const char *s = get(key); + if (s == NULL) + return 0; + return istrue(s); +} diff --git a/scconfig/src/default/db.h b/scconfig/src/default/db.h new file mode 100644 index 0000000..a0e1677 --- /dev/null +++ b/scconfig/src/default/db.h @@ -0,0 +1,37 @@ +#include "ht.h" + + +#define strue "true" +#define sfalse "false" +#define istrue(s) ((s != NULL) && (*s == 't')) +#define isfalse(s) ((s != NULL) && (*s == 'f')) +/* the 3rd option is "unknown" */ + +/* accessors */ +const char *get(const char *key); +const char *put(const char *key, const char *data); +void append(const char *key, const char *value); +char *concat_nodes(const char *prefix, ...); +int node_istrue(const char *key); + + +/* init/uninit */ +void db_init(void); +void db_uninit(void); + +/* export/import */ +int export(const char *fn, int export_empty, const char *root); +int import(const char *fn); +int import_args(const char *key, const char *fn); + +/* file system features */ +extern char *db_cwd; +void db_cd(const char *path); +void db_mkdir(const char *path); +void db_link(const char *existing, const char *new); +void db_rmdir(const char *path); + +extern ht_t *DBs; +#define iscross (ht_get(DBs, "target") != ht_get(DBs, "host")) +#define in_cross_target (iscross && (strcmp(db_cwd, "/target") == 0)) +#define in_cross_host (iscross && (strcmp(db_cwd, "/host") == 0)) diff --git a/scconfig/src/default/dep.c b/scconfig/src/default/dep.c new file mode 100644 index 0000000..f1028a8 --- /dev/null +++ b/scconfig/src/default/dep.c @@ -0,0 +1,227 @@ +/* + scconfig - dependencies + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include "dep.h" +#include "db.h" +#include "log.h" +#include "libs.h" + +typedef struct { + int (*fn)(const char *name, int logdepth, int fatal); +} fn_wrap_t; + + +static ht_t *deps = NULL; + + +/* find name_ and decide if it was a wildcard request; + NOTE: there are requests and servers, both can be wildcard independently. + - if a request ends with a / *, it is an explicit wildcard request (*wild=1) + - if a request names a "directory" that is wildcard-server, that's a wildcard request (*wild=1) + - else the request is a normal request (*wild=0). + For normal requests, a required node was explicitly named; if that node is + not created by the detection function, that's a failure. For wildcard + requests we don't look for any specific node to be created. + TODO: we may still check if at least the directory is created + */ +fn_wrap_t *get_wrap(const char *name_, int *wild, int *missing) +{ + fn_wrap_t *w; + char *name, *sep; + int len; + + len = strlen(name_); + *wild = name_[len-1] == '*'; + + if (*wild) { + char *pres; + pres = malloc(len+16); + memcpy(pres, name_, len-1); + strcpy(pres+len-1, "presents"); + *missing = get(pres) == NULL; + if (*missing) { /* if there's no /presents, it may be a non-directory node with an actual non-empty string value */ + const char *val; + pres[len-2] = '\0'; + val = get(pres); + if (val != NULL) + *missing = !strlen(val); + } + free(pres); + if (!(*missing)) /* already detected, won't be detected again */ + return NULL; + } + *missing = 1; + + /* try full match first */ + w = ht_get(deps, name_); + if (w != NULL) + return w; + + + /* try substituting the last part of the path with * for wildcard matches */ + name = malloc(len+3); /* worst case: ends in a / and we need to append *\0; allocate a bit more */ + memcpy(name, name_, len+1); /* make a copy we can modify */ + if (name[len-1] != '/') { + name[len] = '/'; /* append a / - if name_ was a "directory", this will result in name/ * */ + name[len+1] = '\0'; + } + + *wild = 1; /* if we append a / *, then it's a wildcard request */ + for(;;) { + sep = str_rchr(name, '/'); + if (sep == NULL) + goto error; + sep[1] = '*'; + sep[2] = '\0'; + w = ht_get(deps, name); + if (w != NULL) { + free(name); + return w; + } + *sep = '\0'; + *wild = 0; /* cutting back the second layer - not wildcard request anymore, but a request to a specific node served by a wildcard */ + } + + /* no match, exit with error */ + error:; + *wild = 0; + free(name); + return NULL; +} + + +int require(const char *name, int logdepth, int fatal) +{ + fn_wrap_t *w; + int wild, missing; + + if (get(name) == NULL) { + w = get_wrap(name, &wild, &missing); + if (!missing) + return 0; + if ((w == NULL) || (w->fn == NULL)) { + error("Node %s is required but I don't know how to detect it.\n", name); + abort(); + } + + logprintf(logdepth, "(Required node: '%s')\n", name); + if (w->fn(name, logdepth+1, fatal) != 0) { + if (fatal) { + error("Node %s is required but provided detection callback fails to find that feature on that system.\n", name); + abort(); + } + else { + logprintf(logdepth, "(Feature not found, but it is not fatal)"); + return 1; + } + } + if ((!wild) && (get(name) == NULL)) { + error("Node %s is required but provided detection callback didn't create it (looks like an internal error in scconfig). (db_cwd='%s')\n", name, db_cwd); + abort(); + } + } + return 0; +} + +const char *dep_add(const char *name, int (*finder)(const char *name, int logdepth, int fatal)) +{ + fn_wrap_t *w; + w = malloc(sizeof(fn_wrap_t)); + w->fn = finder; + return ht_set(deps, name, w); +} + +int asked_for(const char *cando, const char *needtodo) +{ + int len; + + /* foo/bar/baz matches /foo/bar/baz */ + if (strcmp(cando, needtodo) == 0) + goto yes; + + len = strlen(needtodo); + if (len == 0) + return 0; + + /* foo/bar/baz matches /foo/bar/ * */ + if ((needtodo[len-1] == '*') && (strncmp(cando, needtodo, len-1) == 0)) + goto yes; + + return 0; + + + yes:; /* asked for it, but have to see if it's already detected */ + if (get(cando) != NULL) + return 0; + + return 1; +} + +int is_dep_wild(const char *path) +{ + int len = strlen(path); + if (len == 0) + return 0; + return (path[len-1] == '*'); +} + +const char *det_list_target(const char *path) +{ + const char *res; + + if (path == NULL) + goto unk; + + res = strrchr(path, '/'); + if (res == NULL) + goto unk; + + return res + 1; +unk:; + return ""; +} + + +void dep_init(void) +{ + deps = ht_resize(ht_alloc(0), 128); +} + +void dep_uninit(void) +{ + ht_free(deps); +} + +int is_dep_known(const char *name) +{ + return (ht_get(deps, name) != NULL); +} + +void require_all(int fatal) +{ + ht_entry_t *h; + + for(h = ht_first(deps); h != NULL; h = ht_next(deps, h)) + require(h->key, 0, fatal); +} diff --git a/scconfig/src/default/dep.h b/scconfig/src/default/dep.h new file mode 100644 index 0000000..4df9063 --- /dev/null +++ b/scconfig/src/default/dep.h @@ -0,0 +1,24 @@ +#include "ht.h" + +int is_dep_known(const char *name); +int require(const char *name, int logdepth, int fatal); +const char *dep_add(const char *name, int (*finder)(const char *name, int logdepth, int fatal)); +void require_all(int fatal); + +/* Returns if dependency is a wildcard one (ending in / *) */ +int is_dep_wild(const char *path); + +/* Almost 'basename': returns the last portion of the path, which may + be '*'. Returns "" on error. */ +const char *det_list_target(const char *path); + +/* Returns 1 if the user asked for detecting a feature; needtodo is + the first argument passed to the detection function (the target the caller + wants to get detected), cando is the output path of the test that the + detector could do next. */ +int asked_for(const char *cando, const char *needtodo); + +/* for internal use */ +void dep_init(void); +void dep_uninit(void); + diff --git a/scconfig/src/default/deps_default.c b/scconfig/src/default/deps_default.c new file mode 100644 index 0000000..da8c503 --- /dev/null +++ b/scconfig/src/default/deps_default.c @@ -0,0 +1,167 @@ +/* + scconfig - dependency list of default tests + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include "dep.h" +#include "find.h" + +void deps_default_init(void) +{ + dep_add("cc/cc", find_cc); + dep_add("cc/argstd/*", find_cc_argstd); + dep_add("cc/cflags", find_cc); + dep_add("cc/ldflags", find_cc); + dep_add("cc/inline", find_inline); + dep_add("cc/varargmacro", find_varargmacro); + dep_add("cc/funcmacro", find_funcmacro); + dep_add("cc/constructor", find_constructor); + dep_add("cc/destructor", find_destructor); + dep_add("cc/rdynamic", find_rdynamic); + dep_add("cc/soname", find_soname); + dep_add("cc/wlrpath", find_wlrpath); + dep_add("cc/wloutimplib", find_cc_wloutimplib); + dep_add("cc/wloutputdef", find_cc_wloutputdef); + dep_add("cc/fpic", find_fpic); + dep_add("cc/fpie/*", find_cc_fpie); + dep_add("cc/fnopie/*", find_cc_fnopie); + dep_add("cc/fnopic/*", find_cc_fnopic); + dep_add("cc/alloca/*", find_alloca); + dep_add("cc/_exit/*", find__exit); + dep_add("cc/ldflags_dynlib", find_ldflags_dynlib); + dep_add("cc/ldflags_dll", find_ldflags_dll); + dep_add("cc/ldflags_so", find_ldflags_so); + dep_add("cc/func_attr/unused/*", find_fattr_unused); + dep_add("cc/declspec/dllimport/*", find_declspec_dllimport); + dep_add("cc/declspec/dllexport/*", find_declspec_dllexport); + dep_add("cc/argmachine/*", find_cc_argmachine); + dep_add("libs/ldl", find_lib_ldl); + dep_add("libs/LoadLibrary/*", find_lib_LoadLibrary); + dep_add("libs/lpthread", find_lib_lpthread); + dep_add("libs/lpthread-recursive", find_lib_lpthread); + dep_add("thread/semget/*", find_thread_semget); + dep_add("thread/pthread_create/*", find_thread_pthread_create); + dep_add("thread/CreateSemaphore/*", find_thread_CreateSemaphore); + dep_add("thread/CreateThread/*", find_thread_CreateThread); + dep_add("libs/errno/*", find_lib_errno); + dep_add("libs/printf_x", find_printf_x); + dep_add("libs/printf_ptrcast", find_printf_ptrcast); + dep_add("libs/snprintf", find_snprintf); + dep_add("libs/snprintf_safe", find_snprintf); + dep_add("libs/dprintf", find_dprintf); + dep_add("libs/vdprintf", find_vdprintf); + dep_add("libs/vsnprintf", find_vsnprintf); + dep_add("libs/proc/_spawnvp/*", find_proc__spawnvp); + dep_add("libs/proc/fork/*", find_proc_fork); + dep_add("libs/proc/wait/*", find_proc_wait); + dep_add("libs/fs/realpath/*", find_fs_realpath); + dep_add("libs/fs/_fullpath/*", find_fs__fullpath); + dep_add("libs/fs/readdir/*", find_fs_readdir); + dep_add("libs/fs/findnextfile/*", find_fs_findnextfile); + dep_add("libs/fs/stat/macros/*", find_fs_stat_macros); + dep_add("libs/fs/stat/fields/*", find_fs_stat_fields); + dep_add("libs/fs/access/*", find_fs_access); + dep_add("libs/fs/access/macros/*", find_fs_access_macros); + dep_add("libs/fs/lstat/*", find_fs_lstat); + dep_add("libs/fs/statlstat/*", find_fs_statlstat); + dep_add("libs/fs/getcwd/*", find_fs_getcwd); + dep_add("libs/fs/_getcwd/*", find_fs__getcwd); + dep_add("libs/fs/getwd/*", find_fs_getwd); + dep_add("libs/fs/mkdir/*", find_fs_mkdir); + dep_add("libs/fs/_mkdir/*", find_fs__mkdir); + dep_add("libs/fs/mkdtemp/*", find_fs_mkdtemp); + dep_add("libs/fs/mmap/*", find_fs_mmap); + dep_add("libs/fsmount/next_dev/*", find_fsmount_next_dev); + dep_add("libs/fsmount/struct_fsstat/*",find_fsmount_fsstat_fields); + dep_add("libs/fsmount/struct_statfs/*",find_fsmount_statfs_fields); + dep_add("libs/fsmount/struct_statvfs/*",find_fsmount_statvfs_fields); + dep_add("libs/fs/ustat/*", find_fs_ustat); + dep_add("libs/fs/statfs/*", find_fs_statfs); + dep_add("libs/fs/statvfs/*", find_fs_statvfs); + dep_add("libs/fs/flock/*", find_fs_flock); + + dep_add("libs/io/pipe/*", find_io_pipe); + dep_add("libs/io/dup2/*", find_io_dup2); + dep_add("libs/io/fileno/*", find_io_fileno); + dep_add("libs/io/lseek/*", find_io_lseek); + dep_add("libs/io/popen/*", find_io_popen); + dep_add("libs/time/usleep/*", find_time_usleep); + dep_add("libs/types/stdint/*", find_types_stdint); + dep_add("sys/types/size/*", find_types_sizes); + dep_add("libs/time/Sleep/*", find_time_Sleep); + dep_add("libs/time/gettimeofday/*", find_time_gettimeofday); + dep_add("libs/time/ftime/*", find_time_ftime); + dep_add("libs/time/timegm/*", find_time_timegm); + dep_add("libs/time/_mkgmtime/*", find_time_mkgmtime); + dep_add("libs/time/gmtime_r/*", find_time_gmtime_r); + dep_add("libs/time/gmtime_s/*", find_time_gmtime_s); + dep_add("libs/env/main_3arg/*", find_main_arg3); + dep_add("libs/env/putenv/*", find_putenv); + dep_add("libs/env/setenv/*", find_setenv); + dep_add("libs/env/environ/*", find_environ); + dep_add("signal/raise/*", find_signal_raise); + dep_add("signal/names/*", find_signal_names); + dep_add("fstools/cp", find_fstools_cp); + dep_add("fstools/ln", find_fstools_ln); + dep_add("fstools/mv", find_fstools_mv); + dep_add("fstools/rm", find_fstools_rm); + dep_add("fstools/mkdir", find_fstools_mkdir); + dep_add("fstools/ar", find_fstools_ar); + dep_add("fstools/ranlib", find_fstools_ranlib); + dep_add("fstools/awk", find_fstools_awk); + dep_add("fstools/cat", find_fstools_cat); + dep_add("fstools/sed", find_fstools_sed); + dep_add("fstools/file_l", find_fstools_file_l); + dep_add("fstools/file", find_fstools_file); + dep_add("fstools/chmodx", find_fstools_chmodx); + dep_add("sys/name", find_uname); + dep_add("sys/uname", find_uname); + dep_add("sys/triplet", find_triplet); + dep_add("sys/sysid", find_sysid); + dep_add("sys/shell", find_shell); + dep_add("sys/shell_needs_quote", find_shell); + dep_add("sys/tmp", find_tmp); + dep_add("sys/shell_eats_backslash", find_tmp); + dep_add("sys/ext_exe", find_uname); + dep_add("sys/ext_dynlib", find_uname); + dep_add("sys/ext_dynlib_native", find_uname); + dep_add("sys/ext_stalib", find_uname); + dep_add("sys/class", find_uname); + dep_add("sys/path_sep", find_uname); + dep_add("sys/ptrwidth", find_sys_ptrwidth); + dep_add("sys/byte_order", find_sys_byte_order); + dep_add("sys/types/size_t/*", find_types_size_t); + dep_add("sys/types/off_t/*", find_types_off_t); + dep_add("sys/types/off64_t/*", find_types_off64_t); + dep_add("sys/types/gid_t/*", find_types_gid_t); + dep_add("sys/types/uid_t/*", find_types_uid_t); + dep_add("sys/types/pid_t/*", find_types_pid_t); + dep_add("sys/types/mode_t/*", find_types_mode_t); + dep_add("sys/types/nlink_t/*", find_types_nlink_t); + dep_add("sys/types/ptrdiff_t/*", find_types_ptrdiff_t); + dep_add("sys/types/dev_t/*", find_types_dev_t); + dep_add("sys/types/ino_t/*", find_types_ino_t); + dep_add("sys/types/void_ptr/*", find_types_void_ptr); + dep_add("str/strcasecmp/*", find_strcasecmp); + dep_add("str/strncasecmp/*", find_strncasecmp); + + dep_add("/internal/filelist/cmd", find_filelist); + dep_add("/internal/filelist/method", find_filelist); +} diff --git a/scconfig/src/default/deps_default.h b/scconfig/src/default/deps_default.h new file mode 100644 index 0000000..77ebf1f --- /dev/null +++ b/scconfig/src/default/deps_default.h @@ -0,0 +1 @@ +void deps_default_init(void); diff --git a/scconfig/src/default/find.h b/scconfig/src/default/find.h new file mode 100644 index 0000000..4314e81 --- /dev/null +++ b/scconfig/src/default/find.h @@ -0,0 +1,159 @@ +/* cc */ +int find_cc(const char *name, int logdepth, int fatal); +int find_cc_argstd(const char *name, int logdepth, int fatal); +int find_cc_argmachine(const char *name, int logdepth, int fatal); +int find_cc_fpie(const char *name, int logdepth, int fatal); +int find_cc_fnopie(const char *name, int logdepth, int fatal); +int find_cc_fnopic(const char *name, int logdepth, int fatal); +int find_inline(const char *name, int logdepth, int fatal); +int find_varargmacro(const char *name, int logdepth, int fatal); +int find_funcmacro(const char *name, int logdepth, int fatal); +int find_constructor(const char *name, int logdepth, int fatal); +int find_destructor(const char *name, int logdepth, int fatal); +int find_fattr_unused(const char *name, int logdepth, int fatal); +int find_declspec_dllimport(const char *name, int logdepth, int fatal); +int find_declspec_dllexport(const char *name, int logdepth, int fatal); +int find_rdynamic(const char *name, int logdepth, int fatal); +int find_soname(const char *name, int logdepth, int fatal); +int find_wlrpath(const char *name, int logdepth, int fatal); +int find_cc_wloutimplib(const char *name, int logdepth, int fatal); +int find_cc_wloutputdef(const char *name, int logdepth, int fatal); +int find_fpic(const char *name, int logdepth, int fatal); +int find_ldflags_dynlib(const char *name, int logdepth, int fatal); +int find_ldflags_dll(const char *name, int logdepth, int fatal); +int find_ldflags_so(const char *name, int logdepth, int fatal); +int find_alloca(const char *name, int logdepth, int fatal); +int find__exit(const char *name, int logdepth, int fatal); + +/* libs */ +int find_lib_ldl(const char *name, int logdepth, int fatal); +int find_lib_LoadLibrary(const char *name, int logdepth, int fatal); +int find_lib_errno(const char *name, int logdepth, int fatal); + +/* thread */ +int find_lib_lpthread(const char *name, int logdepth, int fatal); +int find_thread_semget(const char *name, int logdepth, int fatal); +int find_thread_pthread_create(const char *name, int logdepth, int fatal); +int find_thread_CreateSemaphore(const char *name, int logdepth, int fatal); +int find_thread_CreateThread(const char *name, int logdepth, int fatal); + +/* fscalls */ +int find_fs_realpath(const char *name, int logdepth, int fatal); +int find_fs__fullpath(const char *name, int logdepth, int fatal); +int find_fs_readdir(const char *name, int logdepth, int fatal); +int find_fs_findnextfile(const char *name, int logdepth, int fatal); +int find_fs_access(const char *name, int logdepth, int fatal); +int find_fs_access_macros(const char *name, int logdepth, int fatal); +int find_fs_stat_macros(const char *name, int logdepth, int fatal); +int find_fs_stat_fields(const char *name, int logdepth, int fatal); +int find_fs_lstat(const char *name, int logdepth, int fatal); +int find_fs_statlstat(const char *name, int logdepth, int fatal); +int find_fs_getcwd(const char *name, int logdepth, int fatal); +int find_fs__getcwd(const char *name, int logdepth, int fatal); +int find_fs_getwd(const char *name, int logdepth, int fatal); +int find_fs_mkdir(const char *name, int logdepth, int fatal); +int find_fs__mkdir(const char *name, int logdepth, int fatal); +int find_fs_mkdtemp(const char *name, int logdepth, int fatal); +int find_fs_mmap(const char *name, int logdepth, int fatal); +int find_fsmount_next_dev(const char *name, int logdepth, int fatal); +int find_fsmount_fsstat_fields(const char *name, int logdepth, int fatal); +int find_fsmount_statfs_fields(const char *name, int logdepth, int fatal); +int find_fsmount_statvfs_fields(const char *name, int logdepth, int fatal); +int find_fs_ustat(const char *name, int logdepth, int fatal); +int find_fs_statfs(const char *name, int logdepth, int fatal); +int find_fs_statvfs(const char *name, int logdepth, int fatal); +int find_fs_flock(const char *name, int logdepth, int fatal); + +/* printf */ +int find_printf_x(const char *name, int logdepth, int fatal); +int find_printf_ptrcast(const char *name, int logdepth, int fatal); +int find_snprintf(const char *name, int logdepth, int fatal); +int find_dprintf(const char *name, int logdepth, int fatal); +int find_vdprintf(const char *name, int logdepth, int fatal); +int find_vsnprintf(const char *name, int logdepth, int fatal); + +/* proc */ +int find_proc__spawnvp(const char *name, int logdepth, int fatal); +int find_proc_fork(const char *name, int logdepth, int fatal); +int find_proc_wait(const char *name, int logdepth, int fatal); + +/* fstools */ +int find_fstools_cp(const char *name, int logdepth, int fatal); +int find_fstools_ln(const char *name, int logdepth, int fatal); +int find_fstools_mv(const char *name, int logdepth, int fatal); +int find_fstools_rm(const char *name, int logdepth, int fatal); +int find_fstools_mkdir(const char *name, int logdepth, int fatal); +int find_fstools_ar(const char *name, int logdepth, int fatal); +int find_fstools_ranlib(const char *name, int logdepth, int fatal); +int find_fstools_awk(const char *name, int logdepth, int fatal); +int find_fstools_cat(const char *name, int logdepth, int fatal); +int find_fstools_sed(const char *name, int logdepth, int fatal); +int find_fstools_file(const char *name, int logdepth, int fatal); +int find_fstools_file_l(const char *name, int logdepth, int fatal); +int find_fstools_chmodx(const char *name, int logdepth, int fatal); + +/* uname */ +int find_uname(const char *name, int logdepth, int fatal); +int find_triplet(const char *name, int logdepth, int fatal); +int find_sysid(const char *name, int logdepth, int fatal); + +/* find_target */ +int find_target(const char *name, int logdepth, int fatal); + +/* filelist */ +int find_filelist(const char *name, int logdepth, int fatal); + +/* find_str.c */ +int find_strcasecmp(const char *name, int logdepth, int fatal); +int find_strncasecmp(const char *name, int logdepth, int fatal); + +/* find_sys.c */ +int find_sys_ptrwidth(const char *name, int logdepth, int fatal); +int find_sys_byte_order(const char *name, int logdepth, int fatal); +int find_tmp(const char *name, int logdepth, int fatal); +int find_shell(const char *name, int logdepth, int fatal); + +/* find_io.c */ +int find_io_pipe(const char *name, int logdepth, int fatal); +int find_io_dup2(const char *name, int logdepth, int fatal); +int find_io_fileno(const char *name, int logdepth, int fatal); +int find_io_lseek(const char *name, int logdepth, int fatal); +int find_io_popen(const char *name, int logdepth, int fatal); + +/* find_time.c */ +int find_time_usleep(const char *name, int logdepth, int fatal); +int find_time_Sleep(const char *name, int logdepth, int fatal); +int find_time_gettimeofday(const char *name, int logdepth, int fatal); +int find_time_ftime(const char *name, int logdepth, int fatal); +int find_time_timegm(const char *name, int logdepth, int fatal); +int find_time_mkgmtime(const char *name, int logdepth, int fatal); +int find_time_gmtime_s(const char *name, int logdepth, int fatal); +int find_time_gmtime_r(const char *name, int logdepth, int fatal); + +/* find_types.c */ +int find_types_stdint(const char *name, int logdepth, int fatal); +int find_types_sizes(const char *name, int logdepth, int fatal); +int find_types_size_t(const char *name, int logdepth, int fatal); +int find_types_off_t(const char *name, int logdepth, int fatal); +int find_types_off64_t(const char *name, int logdepth, int fatal); +int find_types_gid_t(const char *name, int logdepth, int fatal); +int find_types_uid_t(const char *name, int logdepth, int fatal); +int find_types_pid_t(const char *name, int logdepth, int fatal); +int find_types_dev_t(const char *name, int logdepth, int fatal); +int find_types_dev_t(const char *name, int logdepth, int fatal); +int find_types_mode_t(const char *name, int logdepth, int fatal); +int find_types_nlink_t(const char *name, int logdepth, int fatal); +int find_types_ptrdiff_t(const char *name, int logdepth, int fatal); +int find_types_dev_t(const char *name, int logdepth, int fatal); +int find_types_ino_t(const char *name, int logdepth, int fatal); +int find_types_void_ptr(const char *name, int logdepth, int fatal); + +/* find_signal.c */ +int find_signal_names(const char *name, int logdepth, int fatal); +int find_signal_raise(const char *name, int logdepth, int fatal); + +/* environ.c */ +int find_main_arg3(const char *name, int logdepth, int fatal); +int find_putenv(const char *name, int logdepth, int fatal); +int find_setenv(const char *name, int logdepth, int fatal); +int find_environ(const char *name, int logdepth, int fatal); diff --git a/scconfig/src/default/find_cc.c b/scconfig/src/default/find_cc.c new file mode 100644 index 0000000..6403459 --- /dev/null +++ b/scconfig/src/default/find_cc.c @@ -0,0 +1,1037 @@ +/* + scconfig - detection of cc and compiler features + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + + + +static int try_flags(int logdepth, const char *cc, const char *test_c, const char *cflags, const char *ldflags, const char *expected) +{ + char *out; + + logprintf(logdepth, "trying cc:try_flags with cc='%s' cflags='%s' ldflags='%s'\n", (cc == NULL ? get("cc/cc") : cc), cflags == NULL ? "" : cflags, ldflags == NULL ? "" : ldflags); + + if (compile_run(logdepth+1, test_c, cc, cflags, ldflags, &out) == 0) { + if (((out == NULL) && (iscross)) || (strncmp(out, expected, strlen(expected)) == 0)) { + free(out); + return 1; + } + free(out); + } + return 0; +} + +static int try_flags_inv(int logdepth, const char *cc, const char *test_c, const char *cflags, const char *ldflags, const char *expected_bad) +{ + char *out; + + logprintf(logdepth, "trying cc:try_flags with cc='%s' cflags='%s' ldflags='%s'\n", (cc == NULL ? get("cc/cc") : cc), cflags == NULL ? "" : cflags, ldflags == NULL ? "" : ldflags); + + if (compile_run(logdepth+1, test_c, cc, cflags, ldflags, &out) == 0) { + if (((out == NULL) && (iscross)) || (strncmp(out, expected_bad, strlen(expected_bad)) != 0)) { + free(out); + return 1; + } + free(out); + } + return 0; +} + +static int try(int logdepth, const char *cc, const char *test_c, const char *expected) +{ + return try_flags(logdepth, cc, test_c, NULL, NULL, expected); +} + + +static int trycc(int logdepth, const char *cc, const char *test_c) +{ + int ret; + + if (cc == NULL) + return 0; + + ret = try(logdepth, cc, test_c, "OK"); + if (ret) + put("cc/cc", cc); + return ret; +} + +int find_cc(const char *name, int logdepth, int fatal) +{ + char *test_c = "#include \nint main() { printf(\"OK\\n\");\nreturn 0;}\n"; + char *out = NULL, *targetcc; + const char *cc, *cflags, *ldflags, *target, *sys; + int len; + + require("sys/name", logdepth, fatal); + + sys = istarget(db_cwd) ? "target" : "host"; + report("Checking for cc (%s)... ", sys); + logprintf(logdepth, "find_cc: trying to find cc (%s)...\n", sys); + logdepth++; + + /* cflags */ + cflags = get("/arg/cc/cflags"); + if (cflags != NULL) { + logprintf(logdepth+1, "using user supplied cflags '%s'\n", cflags); + put("cc/cflags", cflags); + } + + /* ldflags */ + ldflags = get("/arg/cc/ldflags"); + if (ldflags != NULL) { + logprintf(logdepth+1, "using user supplied ldflags '%s'\n", ldflags); + put("cc/ldflags", ldflags); + } + + cc = get("/arg/cc/cc"); + if (cc == NULL) { + target = get("sys/target"); + if (target != NULL) { + logprintf(logdepth+1, "find_cc: crosscompiling for '%s', looking for target cc\n", target); + len = strlen(target); + targetcc = malloc(len + 8); + memcpy(targetcc, target, len); + strcpy(targetcc + len, "-gcc"); + if (!trycc(logdepth+1, targetcc, test_c)) { + strcpy(targetcc + len, "-cc"); + if (!trycc(logdepth+1, targetcc, test_c)) { + report("FAILED: failed to find crosscompiler for target '%s'\n", target); + logprintf(logdepth, "find_cc: FAILED to find a crosscompiler for target '%s'\n", target); + return 1; + } + } + put("cc/cc", targetcc); + } + else { + cc = getenv("CC"); + logprintf(logdepth, "find_cc: Detecting cc (host)\n"); + /* Find a working cc (no arguments) */ + if (!(((cc != NULL) && (trycc(logdepth+1, cc, test_c))) || trycc(logdepth+1, "gcc", test_c) || trycc(logdepth+1, "cc", test_c))) { + report("FAILED to find a compiler\n"); + logprintf(logdepth, "find_cc: FAILED to find a compiler\n"); + return 1; + } + } + } + else { + put("cc/cc", cc); + logprintf(logdepth+1, "using user supplied '%s' (will test later)\n", cc); + } + + /* cflags (again) */ + if (cflags == NULL) { + logprintf(logdepth, "find_cc: Detecting -pipe\n"); + + if (compile_run(logdepth+1, test_c, NULL, "-pipe", "", &out) == 0) { + if (target_emu_fail(out) || (strncmp(out, "OK", 2) == 0)) { + append("cc/cflags", " -pipe"); + } + free(out); + } + } + if (get("cc/cflags") == NULL) + put("cc/cflags", ""); + + /* ldflags (again) */ + if (get("cc/ldflags") == NULL) + put("cc/ldflags", ""); + + /* Final test of all arguments together */ + logprintf(logdepth, "find_cc: final test on cc and all flags \n"); + if (compile_run(logdepth+1, test_c, NULL, NULL, NULL, &out) != 0) { + report("FAILED to get the compiler and all flags to work together\n"); + logprintf(logdepth, "find_cc: the compiler and all the flags don't work well together, aborting\n"); + if (out != NULL) + free(out); + return 1; + } + + report("OK ('%s', '%s', '%s')\n", get("cc/cc"), get("cc/cflags"), get("cc/ldflags")); + logprintf(logdepth, "find_cc: conclusion: cc='%s' cflags='%s' ldflags='%s'\n", get("cc/cc"), get("cc/cflags"), get("cc/ldflags")); + if (out != NULL) + free(out); + return 0; +} + +int find_cc_argstd(const char *det_name, int logdepth, int fatal) +{ + char *test_c = "#include \nint main() { printf(\"OK\\n\");\nreturn 0;}\n"; + char *out = NULL; + char **flg, *flags[] = {"-ansi", "-pedantic", "-Wall", "-std=c89", "-std=c99", "-Werror", "-Wextra", "-W", "-pg", "-no-pie", "-static-pie", NULL}; + const char *det_target = det_list_target(det_name); + + require("cc/cc", logdepth, fatal); + + logprintf(logdepth, "find_cc: Detecting CC args %s\n", det_target); + report("Checking for cc args for std %s... ", det_target); + + for(flg = flags; *flg != NULL; flg++) { + char name[128], *end; + const char *found = ""; + sprintf(name, "cc/argstd/%s", (*flg)+1); + end = strchr(name, '='); + if (end != NULL) + *end = '_'; + if (!asked_for(name, det_name)) + continue; + if (compile_run(logdepth+1, test_c, NULL, *flg, "", &out) == 0) { + if (target_emu_fail(out) || (strncmp(out, "OK", 2) == 0)) { + found = *flg; + report(" "); + report(found); + } + free(out); + } + put(name, found); + } + + if (is_dep_wild(det_name)) + put("cc/argstd/presents", strue); /* to avoid re-detection*/ + + report("\n"); + return 0; +} + +int find_cc_argmachine(const char *name, int logdepth, int fatal) +{ +#define ARGM(flag) "-m" #flag , "-mno-" #flag + const char *test_c = "#include \nint main() { printf(\"OK\\n\");\nreturn 0;}\n"; + char *out = NULL; + const char **flg, *flags[] = { ARGM(mmx), ARGM(sse), ARGM(sse2), ARGM(sse3), ARGM(ssse3), ARGM(sse4), ARGM(sse4.1), ARGM(sse4.2), ARGM(avx), ARGM(avx2), NULL}; + + require("cc/cc", logdepth, fatal); + + logprintf(logdepth, "find_cc: Detecting CC machine args\n"); + report("Checking for cc args for machine... "); + + for(flg = flags; *flg != NULL; flg++) { + char name[128], *end; + const char *found = ""; + { + const char* ptr = (*flg) + 1; + strcpy(name, "cc/argmachine/"); + end = name + strlen(name); + while(*ptr) { + if('.'!=*ptr && '-'!=*ptr) *end++ = *ptr; + ++ptr; + } + *end = '\0'; + } + end = strchr(name, '='); + if (end != NULL) + *end = '_'; + if (compile_run(logdepth+1, test_c, NULL, *flg, "", &out) == 0) { + if (target_emu_fail(out) || (strncmp(out, "OK", 2) == 0)) { + found = *flg; + report(" "); + report(found); + } + free(out); + } + put(name, found); + } + + report("\n"); + return 0; +#undef ARGM +} + +int find_inline(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "static inline void test_inl()" + NL "{" + NL " puts(\"OK\");" + NL "}" + NL "int main() {" + NL " test_inl();" + NL " return 0;" + NL "}" + NL ; + require("cc/cc", logdepth, fatal); + + report("Checking for inline... "); + logprintf(logdepth, "find_inline: trying to find inline...\n"); + logdepth++; + if (try(logdepth, NULL, test_c, "OK")) { + put("cc/inline", strue); + report("Found.\n"); + return 0; + } + put("cc/inline", sfalse); + report("Not found.\n"); + return 1; +} + +int find_varargmacro(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "#define pr(fmt, x...) {printf(\"PR \"); printf(fmt, x); }" + NL "int main() {" + NL " pr(\"%d %d %s\", 42, 8192, \"test\");" + NL " puts(\"\");" + NL " return 0;" + NL "}" + NL ; + require("cc/cc", logdepth, fatal); + + report("Checking for vararg macro... "); + logprintf(logdepth, "find_varargmacro: trying to find vararg macro...\n"); + logdepth++; + if (try(logdepth, NULL, test_c, "PR 42 8192 test")) { + put("cc/varargmacro", strue); + report("Found.\n"); + return 0; + } + put("cc/varargmacro", sfalse); + report("Not found.\n"); + return 1; +} + +int find_funcmacro(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "int main() {" + NL " printf(\"%s\\n\", __func__);" + NL " return 0;" + NL "}" + NL ; + require("cc/cc", logdepth, fatal); + + report("Checking for __func__ macro... "); + logprintf(logdepth, "find_funcmacro: trying to find __func__ macro...\n"); + logdepth++; + if (try(logdepth, NULL, test_c, "main")) { + put("cc/funcmacro", strue); + report("Found.\n"); + return 0; + } + put("cc/funcmacro", sfalse); + report("Not found.\n"); + return 1; +} + +int find_constructor(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "void startup() __attribute__ ((constructor));" + NL "void startup()" + NL "{" + NL " puts(\"OK\");" + NL "}" + NL "int main() {" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + report("Checking for constructor... "); + logprintf(logdepth, "find_constructor: trying to find constructor...\n"); + logdepth++; + if (try(logdepth, NULL, test_c, "OK")) { + put("cc/constructor", strue); + report("Found.\n"); + return 0; + } + put("cc/constructor", sfalse); + report("Not found.\n"); + return 1; +} + +int find_destructor(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "void startup() __attribute__ ((destructor));" + NL "void startup()" + NL "{" + NL " puts(\"OK\");" + NL "}" + NL "int main() {" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + report("Checking for destructor... "); + logprintf(logdepth, "find_destructor: trying to find destructor...\n"); + logdepth++; + if (try(logdepth, NULL, test_c, "OK")) { + put("cc/destructor", strue); + report("Found.\n"); + return 0; + } + put("cc/destructor", sfalse); + report("Not found.\n"); + return 1; +} + +static int test_fattr(const char *name, int logdepth, int fatal, const char *fattr) +{ + char path[64]; + char test_c[256]; + const char *test_c_tmp = + NL "#include " + NL "static void test1() __attribute__ ((%s));" + NL "static void test1()" + NL "{" + NL " puts(\"OK\");" + NL "}" + NL "int main() {" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + sprintf(test_c, test_c_tmp, fattr); + sprintf(path, "cc/func_attr/%s/presents", fattr); + + report("Checking for function attribute %s... ", fattr); + logprintf(logdepth, "test_fattr: trying to find %s...\n", fattr); + logdepth++; + if (try(logdepth, NULL, test_c, "OK")) { + put(path, strue); + report("Found.\n"); + return 0; + } + put(path, sfalse); + report("Not found.\n"); + return 1; +} + +int find_fattr_unused(const char *name, int logdepth, int fatal) +{ + return test_fattr(name, logdepth, fatal, "unused"); +} + +static int test_declspec(const char *name, int logdepth, int fatal, const char *dspec) +{ + char path[64]; + char test_c[256]; + const char *test_c_tmp = + NL "#include " + NL "void __declspec (%s) test1();" + NL "void test1()" + NL "{" + NL " puts(\"OK\");" + NL "}" + NL "int main() {" + NL " test1();" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + sprintf(test_c, test_c_tmp, dspec); + sprintf(path, "cc/declspec/%s/presents", dspec); + + report("Checking for declspec %s... ", dspec); + logprintf(logdepth, "test_declspec: trying to find %s...\n", dspec); + logdepth++; + if (try(logdepth, NULL, test_c, "OK")) { + put(path, strue); + report("Found.\n"); + return 0; + } + put(path, sfalse); + report("Not found.\n"); + return 1; +} + +int find_declspec_dllimport(const char *name, int logdepth, int fatal) +{ + return test_declspec(name, logdepth, fatal, "dllimport"); +} + +int find_declspec_dllexport(const char *name, int logdepth, int fatal) +{ + return test_declspec(name, logdepth, fatal, "dllexport"); +} + +static int test_dll_auxfile(const char *name, int logdepth, int fatal, const char *path, const char *ldflag, const char *filename) +{ + char *ldflags; + char test_c[256]; + const char *test_c_template = + NL "#include " + NL "void %s test1();" + NL "void test1()" + NL "{" + NL " puts(\"OK\");" + NL "}" + NL "int main() {" + NL " test1();" + NL " return 0;" + NL "}" + NL ; + const char *dspec; + + require("cc/cc", logdepth, fatal); + require("cc/declspec/dllexport/*", logdepth, 0); + + if (istrue("cc/declspec/dllexport/presents")) + dspec = " __declspec(dllexport) "; + else + dspec = ""; + + sprintf(test_c, test_c_template, dspec); + + report("Checking for DLL flag %s... ", ldflag); + logprintf(logdepth, "test_dll_auxfile: trying to find %s...\n", ldflag); + logdepth++; + ldflags = str_concat("", ldflag, ",", filename, " ", get("cc/ldflags"), NULL); + if (try_flags(logdepth, NULL, test_c, NULL, ldflags, "OK")) { + unlink(filename); + put(path, ldflag); + free(ldflags); + report("Found.\n"); + return 0; + } + unlink(filename); + free(ldflags); + report("Not found.\n"); + return 1; +} + +int find_cc_wloutimplib(const char *name, int logdepth, int fatal) +{ + return test_dll_auxfile(name, logdepth, fatal, "cc/wloutimplib", "-Wl,--out-implib", "libscconfig_0.a"); +} + +int find_cc_wloutputdef(const char *name, int logdepth, int fatal) +{ + return test_dll_auxfile(name, logdepth, fatal, "cc/wloutputdef", "-Wl,--output-def", "libscconfig_0.def"); +} + +/* Hello world program to test compiler flags */ +static const char *test_hello_world = + NL "#include " + NL "int main() {" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + +static int try_hello(int logdepth, const char *cflags, const char *ldflags, const char *name, const char *value) +{ + if (try_flags(logdepth, NULL, test_hello_world, cflags, ldflags, "OK")) { + put(name, value); + report("OK (%s)\n", value); + return 1; + } + return 0; +} + +int find_rdynamic(const char *name, int logdepth, int fatal) +{ + const char *node = "cc/rdynamic"; + + require("cc/cc", logdepth, fatal); + + report("Checking for rdynamic... "); + logprintf(logdepth, "find_rdynamic: trying to find rdynamic...\n"); + logdepth++; + + if (try_hello(logdepth, NULL, "-rdynamic", node, "-rdynamic")) return 0; + if (try_hello(logdepth, NULL, "-Wl,-export-dynamic", node, "-Wl,-export-dynamic")) return 0; + if (try_hello(logdepth, NULL, NULL, node, "")) return 0; + + report("Not found.\n"); + return 1; +} + +int find_cc_fpie(const char *name, int logdepth, int fatal) +{ + const char *test_c = test_hello_world; + + require("cc/cc", logdepth, fatal); + /* TODO: what about -fpic? */ + + report("Checking for -fpie... "); + logprintf(logdepth, "find_cc_fpie: trying to find -fpie...\n"); + logdepth++; + + /* NOTE: some gcc configuration might not pass the -pie flag to the linker, so */ + /* try to detect whether we can force it to the linker */ + if (try_icl(logdepth, "cc/fpie", test_c, NULL, "-fpie", "-pie -Wl,-pie")) return 0; + if (try_icl(logdepth, "cc/fpie", test_c, NULL, "-fPIE", "-pie -Wl,-pie")) return 0; + if (try_icl(logdepth, "cc/fpie", test_c, NULL, "-fpie", "-pie")) return 0; + if (try_icl(logdepth, "cc/fpie", test_c, NULL, "-fPIE", "-pie")) return 0; + if (try_icl(logdepth, "cc/fpie", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "cc/fpie"); +} + +int find_cc_fnopie(const char *name, int logdepth, int fatal) +{ + const char *test_c = test_hello_world; + + require("cc/cc", logdepth, fatal); + + report("Checking for -fno-pie... "); + logprintf(logdepth, "find_cc_fnopie: trying to find -fno-pie...\n"); + logdepth++; + + if (try_icl(logdepth, "cc/fnopie", test_c, NULL, "-fno-pie", NULL)) return 0; + if (try_icl(logdepth, "cc/fnopie", test_c, NULL, "-fno-pie", "-static")) return 0; + if (try_icl(logdepth, "cc/fnopie", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "cc/fnopie"); +} + +int find_cc_fnopic(const char *name, int logdepth, int fatal) +{ + const char *test_c = test_hello_world; + + require("cc/cc", logdepth, fatal); + + report("Checking for -fno-pic... "); + logprintf(logdepth, "find_cc_fnopic: trying to find -fno-pic...\n"); + logdepth++; + + if (try_icl(logdepth, "cc/fnopic", test_c, NULL, "-fno-pic", NULL)) return 0; + if (try_icl(logdepth, "cc/fnopic", test_c, NULL, "-fno-pic", "-static")) return 0; + if (try_icl(logdepth, "cc/fnopic", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "cc/fnopic"); +} + +int find_soname(const char *name, int logdepth, int fatal) +{ + + require("cc/cc", logdepth, fatal); + + report("Checking for soname... "); + logprintf(logdepth, "find_soname: trying to find soname...\n"); + logdepth++; + + if (try_hello(logdepth, NULL, "-Wl,-soname,libscconfig.0", "cc/soname", "-Wl,-soname,")) return 0; + if (try_hello(logdepth, NULL, NULL, "cc/soname", "")) return 0; + + report("Not found.\n"); + return 1; +} + + +int find_wlrpath(const char *name, int logdepth, int fatal) +{ + + require("cc/cc", logdepth, fatal); + + report("Checking for rpath... "); + logprintf(logdepth, "find_wlrpath: trying to find rpath...\n"); + logdepth++; + + if (try_hello(logdepth, NULL, "-Wl,-rpath=.", "cc/wlrpath", "-Wl,-rpath=")) return 0; + + report("Not found.\n"); + return 1; +} + +int find_fpic(const char *name, int logdepth, int fatal) +{ + + require("cc/cc", logdepth, fatal); + + report("Checking for -fpic... "); + logprintf(logdepth, "find_fpic: trying to find -fpic...\n"); + logdepth++; + + if (try_hello(logdepth, NULL, "-fPIC", "cc/fpic", "-fPIC")) return 0; + if (try_hello(logdepth, NULL, "-fpic", "cc/fpic", "-fpic")) return 0; + if (try_hello(logdepth, NULL, NULL, "cc/fpic", "")) return 0; + + report("Not found.\n"); + return 1; +} + +/* Hello world lib... */ +static const char *test_lib = + NL "#include " + NL "int hello() {" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + +/* ...and the corresponding host application */ +static const char *test_host = + NL "#include " + NL "#include " + NL "#include %s" + NL "int main() {" + NL " void *handle = NULL;" + NL " void (*func)() = NULL;" + NL " char *error;" + NL + NL " handle = dlopen(\"%s\", RTLD_NOW);" + NL " if (handle == NULL) {" + NL " printf(\"dlopen fails: \", dlerror());" + NL " return 1;" + NL " }" + NL " func = dlsym(handle, \"hello\");" + NL " if (func == NULL) {" + NL " printf(\"dlsym fails: \", dlerror());" + NL " return 1;" + NL " }" + NL " func();" + NL " return 0;" + NL "}" + NL ; + +static int try_dynlib(int logdepth, const char *cflags, char *concated_ldflags, const char *name, const char *value, const char *host_app_cflags, const char *host_app_ldflags) +{ + char test_host_app[1024]; + const char *fpic; + const char *ld_include; + const char *dlc; + char *libname, *libname_dyn; + char *cflags_c; + char *oname = ".o"; + int ret = 0; + + + dlc = get("libs/dl-compat"); + if ((dlc != NULL) && (strcmp(dlc, strue) == 0)) + ld_include = ""; + else + ld_include = ""; + + fpic = get("cc/fpic"); + if (fpic == NULL) fpic = ""; + + if (cflags == NULL) + cflags=""; + + cflags_c = malloc(strlen(cflags) + 8 + strlen(fpic)); + sprintf(cflags_c, "%s -c %s", cflags, fpic); + + + libname_dyn = libname = (char *)get("sys/ext_dynlib"); + if ((compile_code(logdepth, test_lib, &oname, NULL, cflags_c, NULL) != 0) || + (compile_file(logdepth, oname, &libname_dyn, NULL, NULL, concated_ldflags) != 0)) { + report("FAILED (compiling dynlib)\n"); + } + else { + sprintf(test_host_app, test_host, ld_include, libname_dyn); + if (try_flags(logdepth, NULL, test_host_app, host_app_cflags, host_app_ldflags, "OK")) { + put(name, value); + report("OK (%s)\n", value); + ret = 1; + } + } + unlink(libname_dyn); + unlink(oname); + if (libname != libname_dyn) + free(libname_dyn); + free(oname); + free(concated_ldflags); + free(cflags_c); + return ret; +} + + +int find_ldflags_dynlib(const char *name, int logdepth, int fatal) +{ + + require("cc/cc", logdepth, fatal); + require("cc/rdynamic", logdepth, fatal); + require("cc/fpic", logdepth, fatal); + require("libs/ldl", logdepth, fatal); + + report("Checking for dynamic library ldflags... "); + logprintf(logdepth, "find_ldflags_dynlib: trying to find dynamic library ldflags...\n"); + logdepth++; + + if (try_dynlib(logdepth, NULL, concat_nodes("-dynamic -shared", "cc/rdynamic", "libs/ldl", NULL), "cc/ldflags_dynlib", "-dynamic -shared", NULL, get("libs/ldl"))) return 0; + if (try_dynlib(logdepth, NULL, concat_nodes("-shared", "cc/rdynamic", "libs/ldl", NULL), "cc/ldflags_dynlib", "-shared", NULL, get("libs/ldl"))) return 0; + report("Not found.\n"); + return 1; +} + +static int try_dll_or_so(int logdepth, int is_dll, const char *lib_ldflags, const char *name, const char *value, + const char *dspec_dllexport, const char *dspec_dllimport, + const char *app_cflags, const char *app_ldflags) +{ + static const char *test_lib_template = + NL "#include " + NL "%s void hello();" + NL "void hello() {" + NL " puts(\"OK\");" + NL "}" + NL ; + static const char *test_app_template = + NL "%s void hello();" + NL "int main() {" + NL " hello();" + NL " return 0;" + NL "}" + NL ; + char test_lib[1024]; + char test_app[1024]; + const char *fpic; + char *cflags_c; + char *oname, *oname_ext; + char *libname, *libname_ext; + char *appname = NULL, *appname_ext = NULL; + char *lib_filename = NULL, *lib_dirname = NULL; + char *lib_ldflags_new = NULL; + char *app_ldflags_new = NULL; + size_t len, ii; + int ret = 0; + + ++logdepth; + + require("cc/cc", logdepth, 0); + require("cc/cflags", logdepth, 0); + require("cc/ldflags", logdepth, 0); + require("cc/fpic", logdepth, 0); + require("sys/ext_exe", logdepth, 0); + require("sys/ext_dynlib_native", logdepth, 0); + + fpic = get("cc/fpic"); + if (fpic == NULL) fpic = ""; + + if (app_cflags == NULL) + app_cflags = ""; + + if (app_ldflags == NULL) + app_ldflags = ""; + + cflags_c = str_concat(" ", get("cc/cflags"), "-c", fpic, NULL); + + oname = oname_ext = ".o"; + libname = libname_ext = (char *)get("sys/ext_dynlib_native"); + sprintf(test_lib, test_lib_template, dspec_dllexport); + lib_ldflags_new = str_concat(" ", get("cc/ldflags"), lib_ldflags, NULL); + if ((compile_code(logdepth, test_lib, &oname, NULL, cflags_c, NULL) != 0) || + (compile_file(logdepth, oname, &libname, NULL, NULL, lib_ldflags_new) != 0)) { + report("FAILED (compiling %s)\n", (is_dll?"DLL":"SO")); + } + else { + lib_filename = file_name(libname); + lib_dirname = dir_name(libname); + + if (!is_dll) { + len = strlen(lib_filename) - strlen(libname_ext); + for (ii=3; ii" + NL "int main() {" + NL " char *s;" + NL " s = alloca(128);" + NL " if (s != NULL)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + +static int try_alloca(int logdepth, const char *cflags, const char *ldflags, const char *name, const char *value) +{ + if (try_flags(logdepth, NULL, test_alloca, cflags, ldflags, "OK")) { + put(name, value); + report("OK (%s)\n", value); + return 1; + } + return 0; +} + +int find_alloca(const char *name, int logdepth, int fatal) +{ + require("cc/cc", logdepth, fatal); + + report("Checking for alloca()... "); + logprintf(logdepth, "find_alloca: trying to find alloca()...\n"); + logdepth++; + + if (try_alloca(logdepth, NULL, NULL, "cc/alloca/presents", "true")) return 0; + + put("cc/alloca/presents", "false"); + report("Not found.\n"); + return 1; +} + + +int find__exit(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "int main() {" + NL " _exit(0);" + NL " puts(\"BAD\");" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + report("Checking for _exit()... "); + logprintf(logdepth, "find__exit: trying to find _exit()...\n"); + logdepth++; + + if (try_flags_inv(logdepth, NULL, test_c, NULL, NULL, "BAD")) { + put("cc/_exit/presents", strue); + report("found\n"); + return 0; + } + + put("cc/_exit/presents", sfalse); + report("Not found.\n"); + return 1; +} + diff --git a/scconfig/src/default/find_environ.c b/scconfig/src/default/find_environ.c new file mode 100644 index 0000000..4004e4d --- /dev/null +++ b/scconfig/src/default/find_environ.c @@ -0,0 +1,166 @@ +/* + scconfig - detection of environmental variable access features + Copyright (C) 2014 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_main_arg3(const char *name, int logdepth, int fatal) +{ + char *out; + char *test_c = + NL "#include " + NL "#include " + NL "int main(int argc, char *argv[], char *env[])" + NL "{" + NL " char **e;" + NL " int cnt;" + NL " for(e = env, cnt = 0; *e != NULL; e++, cnt++) ;" + NL " printf(\"%d\\n\", cnt);" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for main() with 3 arguments... "); + logprintf(logdepth, "find_main_3args: checking for main() with 3 arguments\n"); + if (compile_run(logdepth+1, test_c, NULL, NULL, NULL, &out) == 0) { + if (atoi(out) > 1) { + put("libs/env/main_3arg", strue); + report("OK\n"); + free(out); + return 0; + } + free(out); + report("not found (broken output).\n"); + } + else { + report("not found (no output).\n"); + } + put("libs/env/main_3arg", sfalse); + return 1; +} + +int find_environ(const char *name, int logdepth, int fatal) +{ + char *out; + char *test_c = + NL "#include " + NL "#include " + NL "extern char **environ;" + NL "int main(int argc, char *argv[])" + NL "{" + NL " char **e;" + NL " int cnt;" + NL " for(e = environ, cnt = 0; *e != NULL; e++, cnt++) ;" + NL " printf(\"%d\\n\", cnt);" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for extern environ... "); + logprintf(logdepth, "find_environ: checking for extern environ\n"); + if (compile_run(logdepth+1, test_c, NULL, NULL, NULL, &out) == 0) { + if (atoi(out) > 1) { + put("libs/env/environ", strue); + report("OK\n"); + free(out); + return 0; + } + free(out); + report("not found (broken output).\n"); + } + else { + report("not found (no output).\n"); + } + put("libs/env/environ", sfalse); + return 1; +} + +int find_putenv(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "int main(int argc, char *argv[])" + NL "{" + NL " putenv(\"SCCONFIG_TEST=bad\");" + NL " putenv(\"SCCONFIG_TEST=OK\");" + NL " printf(\"%s\\n\", getenv(\"SCCONFIG_TEST\"));" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for putenv()... "); + logprintf(logdepth, "find_putenv: trying to find putenv...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/env/putenv", test_c, "", NULL, NULL)) + return 0; + if (try_icl(logdepth, "libs/env/putenv", test_c, "#define _XOPEN_SOURCE", NULL, NULL)) + return 0; + if (try_icl(logdepth, "libs/env/putenv", test_c, "#define _SVID_SOURCE", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/env/putenv"); +} + + +int find_setenv(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "int main(int argc, char *argv[])" + NL "{" + NL " setenv(\"SCCONFIG_TEST\", \"bad\", 1);" + NL " setenv(\"SCCONFIG_TEST\", \"OK\", 1);" + NL " printf(\"%s\\n\", getenv(\"SCCONFIG_TEST\"));" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for setenv()... "); + logprintf(logdepth, "find_setenv: trying to find setenv...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/env/setenv", test_c, "", NULL, NULL)) + return 0; + if (try_icl(logdepth, "libs/env/setenv", test_c, "#define _BSD_SOURCE", NULL, NULL)) + return 0; + if (try_icl(logdepth, "libs/env/setenv", test_c, "#define _POSIX_C_SOURCE 200112L", NULL, NULL)) + return 0; + if (try_icl(logdepth, "libs/env/setenv", test_c, "#define _XOPEN_SOURCE 600", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/env/setenv"); +} + diff --git a/scconfig/src/default/find_fscalls.c b/scconfig/src/default/find_fscalls.c new file mode 100644 index 0000000..4aa2320 --- /dev/null +++ b/scconfig/src/default/find_fscalls.c @@ -0,0 +1,806 @@ +/* + scconfig - detection of standard library features: file system specific calls + Copyright (C) 2010 Tibor Palinkas + Copyright (C) 2018 Aron Barath + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_fs_realpath(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "#include " + NL "#ifdef PATH_MAX" + NL "char out_buf[PATH_MAX];" + NL "#else" + NL "char out_buf[32768];" + NL "#endif" + NL "int main() {" + NL " if (realpath(\".\", out_buf) == out_buf)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for realpath()... "); + logprintf(logdepth, "find_fs_realpath: trying to find realpath...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/realpath", test_c, NULL, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/fs/realpath", test_c, "#define _DEFAULT_SOURCE", NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/fs/realpath", test_c, "#define _BSD_SOURCE", NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fs/realpath"); +} + + +int find_fs__fullpath(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "#include " + NL "#include " + NL "int main() {" + NL " char full[_MAX_PATH];" + NL " if (_fullpath(full, \".\", _MAX_PATH) != NULL)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for _fullpath()... "); + logprintf(logdepth, "find_fs__fullpath: trying to find _fullpath...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/_fullpath", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fs/_fullpath"); +} + + +int find_fs_readdir(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " DIR *dirp;" + NL " struct dirent *dp;" + NL " int found = 0;" + NL " if ((dirp = opendir(\".\")) == 0)" + NL " return -1;" + NL " while ((dp = readdir(dirp)) != 0)" + NL " if (strcmp(dp->d_name, \"configure\") == 0)" + NL " found++;" + NL " closedir(dirp);" + NL " if (found == 1)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}"; + char *includes[] = { + "#include ", + "#include ", /* 4.2BSD */ + NULL + }; + char **i; + + require("cc/cc", logdepth, fatal); + + report("Checking for readdir()... "); + logprintf(logdepth, "find_fs_readdir: trying to find readdir...\n"); + logdepth++; + + for (i = includes; *i != NULL; i++) + if (try_icl(logdepth, "libs/fs/readdir", test_c, *i, NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/fs/readdir"); +} + + +int find_fs_findnextfile(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "#include " + NL "int main(int argc, char *argv[]) {" + NL " WIN32_FIND_DATA fd;" + NL " HANDLE h;" + NL " int found=0;" + NL " h = FindFirstFile(argv[0], &fd);" + NL " if (h == INVALID_HANDLE_VALUE)" + NL " return -1;" + NL " while (FindNextFile(h, &fd) != 0);" + NL " found++;" + NL " FindClose(h);" + NL " if (found > 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}"; + + require("cc/cc", logdepth, fatal); + + report("Checking for FindNextFile()... "); + logprintf(logdepth, "find_fs_findnextfile: trying to find FindNextFile...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/findnextfile", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fs/findnextfile"); +} + +int find_fs_access(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int my_test() { return access(\".\", 0); }" + NL "#include " + NL "int main() {" + NL " if (my_test() == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + const char* includes[] = { "#include ", "#include \n#include ", "#include ", NULL }; + const char** inc; + + require("cc/cc", logdepth, fatal); + + report("Checking for access()... "); + logprintf(logdepth, "find_fs_access: trying to find access...\n"); + logdepth++; + + for (inc=includes; *inc; ++inc) + if (try_icl(logdepth, "libs/fs/access", test_c, *inc, NULL, NULL)) return 0; + + return try_fail(logdepth, "libs/fs/access"); +} + +int find_fs_access_macros(const char *rname, int logdepth, int fatal) +{ + char *test_c_templ = + NL "%s" + NL "void my_test() { int a = %s; }" + NL "#include " + NL "int main() {" + NL " my_test();" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + char test_c[256]; + + char *names[][3] = { + {"F_OK", "F_OK", NULL}, + {"R_OK", "R_OK", NULL}, + {"W_OK", "W_OK", NULL}, + {"X_OK", "X_OK", NULL}, + {NULL, NULL, NULL} + }; + char **n; + const char* access_includes; + int name, pr; + char nodename[128]; + + require("cc/cc", logdepth, fatal); + if (require("libs/fs/access/*", logdepth, fatal)!=0 || + !istrue(get("libs/fs/access/presents"))) { + put("libs/fs/access/macros/presents", sfalse); + return 1; + } + access_includes = get("libs/fs/access/includes"); + + report("Checking for access macros:\n"); + logprintf(logdepth, "find_fs_access_macros: trying to find access macros...\n"); + logdepth++; + + pr = 0; + for(name = 0; *names[name] != NULL; name++) { + report(" %s...\t", names[name][0]); + for(n = &names[name][0]; *n != NULL; n++) { + sprintf(test_c, test_c_templ, access_includes, *n); + if (try_icl(logdepth, NULL, test_c, NULL, NULL, NULL)) { + sprintf(nodename, "libs/fs/access/macros/%s", names[name][0]); + put(nodename, *n); + report("found as %s\n", *n); + pr++; + goto found; + } + } + report("not found\n"); + found:; + } + + put("libs/fs/access/macros/presents", ((pr > 0) ? (strue) : (sfalse))); + return (pr == 0); +} + +int find_fs_stat_macros(const char *rname, int logdepth, int fatal) +{ + char *test_c_templ = + NL "#include " + NL "#include " + NL "void my_test() { int a = %s(0); }" + NL "#include " + NL "int main() {" + NL " my_test();" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + char test_c[256]; + + char *names[][3] = { + {"S_ISREG", "S_IFREG", NULL}, + {"S_ISDIR", "S_IFDIR", NULL}, + {"S_ISCHR", "S_IFCHR", NULL}, + {"S_ISBLK", "S_IFBLK", NULL}, + {"S_ISFIFO", "S_IFFIFO", NULL}, + {"S_ISLNK", "S_IFLNK", NULL}, + {"S_ISCHR", "S_IFCHR", NULL}, + {"S_ISSOCK", "S_IFSOCK", NULL}, + {NULL, NULL, NULL} + }; + char **n; + int name, pr; + char nodename[128]; + + require("cc/cc", logdepth, fatal); + + report("Checking for stat macros:\n"); + logprintf(logdepth, "find_fs_stat_macros: trying to find stat macros...\n"); + logdepth++; + + pr = 0; + for(name = 0; *names[name] != NULL; name++) { + report(" %s...\t", names[name][0]); + for(n = &names[name][0]; *n != NULL; n++) { + sprintf(test_c, test_c_templ, *n); + if (try_icl(logdepth, NULL, test_c, NULL, NULL, NULL)) { + sprintf(nodename, "libs/fs/stat/macros/%s", names[name][0]); + put(nodename, *n); + report("found as %s\n", *n); + pr++; + goto found; + } + } + report("not found\n"); + found:; + } + + put("libs/fs/stat/macros/presents", ((pr > 0) ? (strue) : (sfalse))); + return (pr == 0); +} + +int find_fs_stat_fields(const char *rname, int logdepth, int fatal) +{ + char *test_c_templ = + NL "#include " + NL "#include " + NL "#include " + NL "int main() {" + NL " struct stat st;" + NL " (void)st.%s;" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + char test_c[256]; + + char *names[] = {"st_blksize", "st_blocks", "st_rdev", "st_mtim", "st_mtime", "st_birthtim", "st_birthtime", NULL }; + int name, pr; + char nodename[128]; + + require("cc/cc", logdepth, fatal); + + report("Checking for stat macros:\n"); + logprintf(logdepth, "find_fs_stat_fields: trying to find stat macros...\n"); + logdepth++; + + pr = 0; + for(name = 0; names[name] != NULL; name++) { + report(" %s...\t", names[name]); + sprintf(test_c, test_c_templ, names[name]); + sprintf(nodename, "libs/fs/stat/fields/%s/presents", names[name]); + if (try_icl(logdepth, NULL, test_c, NULL, NULL, NULL)) { + put(nodename, strue); + report("found\n"); + pr++; + } + else { + report("not found\n"); + put(nodename, sfalse); + } + } + return (pr == 0); +} + +static int find_fs_any_lstat(const char *name, int logdepth, int fatal, char *fn) +{ + /* make sure does not affect our lstat() detection */ + const char *test_c_in = + NL "void my_puts(const char *s);" + NL "int main() {" + NL " struct stat buf;" + NL " if (%s(\".\", &buf) == 0)" + NL " my_puts(\"OK\");" + NL " return 0;" + NL "}" + NL "#include " + NL "void my_puts(const char *s)" + NL "{" + NL " puts(s);" + NL "}" + NL; + char test_c[384], node[64]; + const char *incs[] = {"#include ", "#include ", "#include \n#include \n#include ", NULL}; + const char **inc; + + require("cc/cc", logdepth, fatal); + + sprintf(node, "libs/fs/%s", fn); + sprintf(test_c, test_c_in, fn); + + report("Checking for %s... ", fn); + logprintf(logdepth, "find_fs_%s: trying to find lstat()...\n", fn); + logdepth++; + + for (inc = incs; *inc; ++inc) { + if (try_icl(logdepth, node, test_c, *inc, NULL, NULL)) + return 0; + } + + return try_fail(logdepth, node); +} + +int find_fs_lstat(const char *name, int logdepth, int fatal) +{ + return find_fs_any_lstat(name, logdepth, fatal, "lstat"); +} + +int find_fs_statlstat(const char *name, int logdepth, int fatal) +{ + return find_fs_any_lstat(name, logdepth, fatal, "statlstat"); +} + + +int find_fs_getcwd(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " char b[1024];" + NL " if (getcwd(b, sizeof(b)) != NULL)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for getcwd... "); + logprintf(logdepth, "find_fs_getcwd: trying to find getcwd()...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/getcwd", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fs/getcwd"); +} + +int find_fs__getcwd(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " char b[1024];" + NL " if (_getcwd(b, sizeof(b)) != NULL)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for _getcwd... "); + logprintf(logdepth, "find_fs__getcwd: trying to find _getcwd()...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/_getcwd", test_c, "#include ", NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fs/_getcwd"); +} + + +int find_fs_getwd(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " char b[8192];" + NL " if (getwd(b) != NULL)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for getwd... "); + logprintf(logdepth, "find_fs_getwd: trying to find getwd()...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/getwd", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fs/getwd"); +} + +int find_fs_mkdir(const char *name, int logdepth, int fatal) +{ + char *dir; + char test_c[1024]; + char *test_c_in = + NL "#include " + NL "int main() {" + NL no_implicit(int, "mkdir", "mkdir") + NL " if (mkdir(\"%s\"%s) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + dir = tempfile_new(""); + unlink(dir); + + report("Checking for mkdir... "); + logprintf(logdepth, "find_fs_mkdir: trying to find mkdir()...\n"); + logdepth++; + + /* POSIX, 2 arguments, standard includes */ + sprintf(test_c, test_c_in, dir, ", 0755"); + if (try_icl(logdepth, "libs/fs/mkdir", test_c, "#include \n#include \n", NULL, NULL)) { + if (!is_dir(dir)) + goto oops1; + put("libs/fs/mkdir/num_args", "2"); + rmdir(dir); + return 0; + } + + /* POSIX, 2 arguments, no includes */ + oops1:; + sprintf(test_c, test_c_in, dir, ", 0755"); + if (try_icl(logdepth, "libs/fs/mkdir", test_c, NULL, NULL, NULL)) { + if (!is_dir(dir)) + goto oops2; + put("libs/fs/mkdir/num_args", "2"); + rmdir(dir); + return 0; + } + + /* win32, 1 argument, with */ + oops2:; + sprintf(test_c, test_c_in, dir, ""); + if (try_icl(logdepth, "libs/fs/mkdir", test_c, "#include \n", NULL, NULL)) { + if (!is_dir(dir)) + goto oops3; + put("libs/fs/mkdir/num_args", "1"); + rmdir(dir); + return 0; + } + + oops3:; + put("libs/fs/mkdir/includes", ""); + put("libs/fs/mkdir/ldflags", ""); + put("libs/fs/mkdir/cdflags", ""); + + rmdir(dir); + return try_fail(logdepth, "libs/fs/mkdir"); +} + +int find_fs__mkdir(const char *name, int logdepth, int fatal) +{ + char *dir; + char test_c[1024]; + char *test_c_in = + NL "#include " + NL "int main() {" + NL " if (_mkdir(\"%s\"%s) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + dir = tempfile_new(""); + unlink(dir); + + report("Checking for _mkdir... "); + logprintf(logdepth, "find_fs__mkdir: trying to find _mkdir()...\n"); + logdepth++; + + /* win32, 2 arguments, standard includes */ + sprintf(test_c, test_c_in, dir, ", 0755"); + if (try_icl(logdepth, "libs/fs/_mkdir", test_c, "#include \n", NULL, NULL)) { + if (!is_dir(dir)) + goto oops1; + put("libs/fs/_mkdir/num_args", "2"); + rmdir(dir); + return 0; + } + + oops1:; + /* win32, 1 argument, standard includes */ + sprintf(test_c, test_c_in, dir, ""); + if (try_icl(logdepth, "libs/fs/_mkdir", test_c, "#include \n", NULL, NULL)) { + if (!is_dir(dir)) + goto oops2; + put("libs/fs/_mkdir/num_args", "1"); + rmdir(dir); + return 0; + } + + oops2:; + put("libs/fs/_mkdir/includes", ""); + put("libs/fs/_mkdir/ldflags", ""); + put("libs/fs/_mkdir/cdflags", ""); + + rmdir(dir); + return try_fail(logdepth, "libs/fs/_mkdir"); +} + +int find_fs_mkdtemp(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "#include " + NL "int main() {" + NL " char fn[32], *o;" + NL " strcpy(fn, \"scc.XXXXXX\");" + NL " o = mkdtemp(fn);" + NL " if ((o != NULL) && (strstr(o, \"scc.\") != NULL)) {" + NL " remove(o);" + NL " puts(\"OK\");" + NL " }" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for mkdtemp... "); + logprintf(logdepth, "find_fs_mkdtemp: trying to find mkdtemp()...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/mkdtemp", test_c, "#include \n", NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/fs/mkdtemp", test_c, "#define _BSD_SOURCE\n#include \n", NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fs/mkdtemp"); +} + +int find_fs_mmap(const char *name, int logdepth, int fatal) +{ + char test_c[1024]; + char *tmp; + FILE *f; + char *test_c_in = + NL "#include " + NL "#include " + NL "#include " + NL "#include " + NL "#include " + NL "#include " + NL "int main() {" + NL " int fd, size = 11;" + NL " void *p;" + NL " fd = open(\"%s\", O_RDONLY);" + NL " p = mmap(0, size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0);" + NL " if (p == NULL) {" + NL " puts(\"mmap fail\");" + NL " return 0;" + NL " }" + NL " if (strcmp(p, \"hello world\") != 0) {" + NL " puts(\"strcmp fail\");" + NL " return 0;" + NL " }" + NL " if (munmap(p, size) != 0) {" + NL " puts(\"munmap fail\");" + NL " return 0;" + NL " }" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + tmp = tempfile_new(""); + f = fopen(tmp, "w"); + fprintf(f, "hello world"); + fclose(f); + sprintf(test_c, test_c_in, tmp); + + report("Checking for mmap... "); + logprintf(logdepth, "find_fs_mmap: trying to find mmap()...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fs/mmap", test_c, "#include \n", NULL, NULL)) { + unlink(tmp); + free(tmp); + return 0; + } + + unlink(tmp); + free(tmp); + return try_fail(logdepth, "libs/fs/mmap"); +} + +/* Haiku/BeOS next_dev */ +int find_fsmount_next_dev(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main()" + NL "{" + NL " int32 pos = 0;" + NL " dev_t res = next_dev(&pos);" + NL " if (res >= 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for next_dev... "); + logprintf(logdepth, "find_fsmount_next_dev: trying to find next_dev()...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/fsmount/next_dev", test_c, "#include \n", NULL, NULL)) return 0; + return try_fail(logdepth, "libs/fsmount/next_dev"); +} + +int find_fsmount_fsstat_fields(const char *name, int logdepth, int fatal) +{ + const char *fields[] = {"f_fstypename", NULL}; + return try_icl_sfields(logdepth, "libs/fsmount/struct_fsstat", "struct fsstat", fields, "#include ", NULL, NULL, 0); +} + +int find_fsmount_statfs_fields(const char *name, int logdepth, int fatal) +{ + const char *fields[] = {"f_fstypename", "f_type", NULL}; + return try_icl_sfields(logdepth, "libs/fsmount/struct_statfs", "struct statfs", fields, "#include ", NULL, NULL, 0); +} + +int find_fsmount_statvfs_fields(const char *name, int logdepth, int fatal) +{ + const char *fields[] = {"f_fstypename", "f_type", "f_basetype", NULL}; + return try_icl_sfields(logdepth, "libs/fsmount/struct_statvfs", "struct statvfs", fields, "#include ", NULL, NULL, 0); +} + +int find_fs_ustat(const char *name, int logdepth, int fatal) +{ + const char *key = "libs/fs/ustat"; + const char *test_c = + NL "#include " + NL "#include " + NL "int main()" + NL "{" + NL " struct stat stat_buf;" + NL " struct ustat ustat_buf;" + NL " if (stat(\".\", &stat_buf) == 0 &&" + NL " ustat(stat_buf.st_dev, &ustat_buf) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for ustat... "); + logprintf(logdepth, "find_fs_ustat: trying to find ustat()...\n"); + logdepth++; + + if (try_icl(logdepth, key, test_c, "#include ", NULL, NULL)) return 0; + if (try_icl(logdepth, key, test_c, "#include ", NULL, NULL)) return 0; + if (try_icl(logdepth, key, test_c, "#include \n#include ", NULL, NULL)) return 0; + if (try_icl(logdepth, key, test_c, "#include \n#include \n#include ", NULL, NULL)) return 0; + return try_fail(logdepth, key); +} + +int find_fs_statfs(const char *name, int logdepth, int fatal) +{ + const char *key = "libs/fs/statfs"; + const char *test_c = + NL "#include " + NL "int main()" + NL "{" + NL " struct statfs statfs_buf;" + NL " if (statfs(\".\", &statfs_buf) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for statfs... "); + logprintf(logdepth, "find_fs_statfs: trying to find statfs()...\n"); + logdepth++; + + if (try_icl(logdepth, key, test_c, "#include ", NULL, NULL)) return 0; + if (try_icl(logdepth, key, test_c, "#include ", NULL, NULL)) return 0; + return try_fail(logdepth, key); +} + +int find_fs_statvfs(const char *name, int logdepth, int fatal) +{ + const char *key = "libs/fs/statvfs"; + const char *test_c = + NL "#include " + NL "int main()" + NL "{" + NL " struct statvfs statvfs_buf;" + NL " if (statvfs(\".\", &statvfs_buf) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for statvfs... "); + logprintf(logdepth, "find_fs_statvfs: trying to find statvfs()...\n"); + logdepth++; + + if (try_icl(logdepth, key, test_c, "#include ", NULL, NULL)) return 0; + return try_fail(logdepth, key); +} + +int find_fs_flock(const char *name, int logdepth, int fatal) +{ + const char *key = "libs/fs/flock"; + const char *test_c = + NL "#include " + NL "int main()" + NL "{" + NL " if (flock(1, LOCK_UN) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for flock... "); + logprintf(logdepth, "find_fs_flock: trying to find flock()...\n"); + logdepth++; + + if (try_icl(logdepth, key, test_c, "#include ", NULL, NULL)) return 0; + return try_fail(logdepth, key); +} diff --git a/scconfig/src/default/find_fstools.c b/scconfig/src/default/find_fstools.c new file mode 100644 index 0000000..5dc894d --- /dev/null +++ b/scconfig/src/default/find_fstools.c @@ -0,0 +1,833 @@ +/* + scconfig - detection of file system tools + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" +#include "dep.h" + +static int test_cp_ln(int logdepth, const char *command, int link) +{ + char *src, *dst, *src_esc, *dst_esc; + char *cmd, *result; + char *test_string = "works."; + int ret; + + logprintf(logdepth, "trying '%s'\n", command); + + src = tempfile_dump(test_string, ""); + dst = tempfile_new(""); + if (link) + unlink(dst); + + src_esc = shell_escape_dup(src); + dst_esc = shell_escape_dup(dst); + cmd = malloc(strlen(command) + strlen(src_esc) + strlen(dst_esc) + 32); + sprintf(cmd, "%s %s %s", command, src_esc, dst_esc); + run_shell(logdepth, cmd, NULL); + free(cmd); + free(src_esc); + free(dst_esc); + + result = load_file(dst); + ret = !strcmp(result, test_string); + logprintf(logdepth+1, "result: '%s' == '%s' (%d)\n", result, test_string, ret); + free(result); + + unlink(src); + free(src); + + result = load_file(dst); + if (link) { + if (strcmp(result, test_string) == 0) { + report("Warning: link is copy (or hard link). "); + logprintf(logdepth+1, "Warning: link is copy (or hard link).\n"); + } + } + else { + if (strcmp(result, test_string) != 0) { + report("Warning: copy is symlink. "); + logprintf(logdepth+1, "Warning: copy is symlink.\n"); + } + } + free(result); + + if (ret) { + if (link) + put("fstools/ln", command); + else + put("fstools/cp", command); + + report("OK (%s)\n", command); + } + + unlink(dst); + free(dst); + return ret; +} + +static int test_mv(int logdepth, const char *command) +{ + char *src, *dst, *src_esc, *dst_esc; + char *cmd, *result; + char *test_string = "works."; + int ret; + + logprintf(logdepth, "trying '%s'\n", command); + + src = tempfile_dump(test_string, ""); + dst = tempfile_new(""); + unlink(dst); + + src_esc = shell_escape_dup(src); + dst_esc = shell_escape_dup(dst); + cmd = malloc(strlen(command) + strlen(src_esc) + strlen(dst_esc) + 32); + sprintf(cmd, "%s %s %s", command, src_esc, dst_esc); + run_shell(logdepth, cmd, NULL); + free(cmd); + free(src_esc); + free(dst_esc); + + result = load_file(dst); + ret = !strcmp(result, test_string); + logprintf(logdepth+1, "result: '%s' == '%s' (%d)\n", result, test_string, ret); + free(result); + + if (file_size(src) > 0) { + report("Warning: mv is copy. "); + logprintf(logdepth+1, "Warning: mv is copy.\n"); + } + + if (ret) { + put("fstools/mv", command); + report("OK (%s)\n", command); + } + + unlink(dst); + unlink(src); + free(dst); + free(src); + return ret; +} + +static int test_mkdir(int logdepth, const char *command) +{ + char *dir, *file; + char *dir_esc; + char *cmd, *result; + char *test_string = "works."; + int ret = 0, had_p; + FILE *f; + + logprintf(logdepth, "trying '%s'\n", command); + dir = tempfile_new(""); + dir_esc = shell_escape_dup(dir); + unlink(dir); + + had_p = is_dir("-p"); + + cmd = malloc(strlen(command) + strlen(dir_esc) + 32); + sprintf(cmd, "%s %s", command, dir_esc); + run_shell(logdepth, cmd, NULL); + free(cmd); + + file = malloc(strlen(dir) + 32); + sprintf(file, "%s/test", dir); + f = fopen(file, "w"); + if (f != NULL) { + fputs(test_string, f); + fclose(f); + result = load_file(file); + if (strcmp(result, test_string) == 0) + ret = 1; + free(result); + } + + unlink(file); + unlink(dir); + + cmd = malloc(strlen(dir) + 32); + sprintf(cmd, "rmdir %s", dir_esc); + run_shell(logdepth, cmd, NULL); + free(cmd); + + free(file); + free(dir); + free(dir_esc); + + /* This is a bit ugly, but on win32 or other systems where mkdir works + but -p doesn't have an effect, a directory called -p may be left over... */ + if ((!had_p) && (is_dir("-p"))) { + unlink("-p"); + return 0; + } + + if (ret != 0) { + put("fstools/mkdir", command); + report("OK (%s)\n", command); + } + + return ret; +} + +static int test_rm(int logdepth, const char *command) +{ + char *src, *src_esc, *cmd, *test_string = "works."; + int ret; + + logprintf(logdepth, "trying '%s'\n", command); + + src = tempfile_dump(test_string, ""); + + if (file_size(src) < 0) { + report("error: can't create temp file\n"); + free(src); + return 0; + } + + + src_esc = shell_escape_dup(src); + cmd = malloc(strlen(command) + strlen(src_esc) + 32); + sprintf(cmd, "%s %s", command, src_esc); + run_shell(logdepth, cmd, NULL); + free(cmd); + free(src_esc); + + ret = file_size(src) < 0; + + if (ret) { + put("fstools/rm", command); + report("OK (%s)\n", command); + } + else + unlink(src); + + free(src); + return ret; +} + +static int test_ar(int logdepth, const char *command) +{ + char *src, *dst, *src_esc, *dst_esc; + char *cmd, *result, *expected; + char *test_string = "works."; + const char *path_sep; + int ret = 0; + + logprintf(logdepth, "trying '%s'\n", command); + path_sep = get("sys/path_sep"); + + src = tempfile_dump(test_string, ""); + dst = tempfile_new(""); + unlink(dst); + + src_esc = shell_escape_dup(src); + dst_esc = shell_escape_dup(dst); + cmd = malloc(strlen(command) + strlen(src_esc) + strlen(dst_esc) + 32); + sprintf(cmd, "%s ru %s %s", command, dst_esc, src_esc); + run_shell(logdepth, cmd, NULL); + sprintf(cmd, "%s t %s", command, dst_esc); + run_shell(logdepth, cmd, &result); + free(cmd); + free(dst_esc); + free(src_esc); + + if (result != NULL) { + expected = str_rchr(src, *path_sep); + if (expected == NULL) + expected = src; + else + expected++; + + ret = strncmp(expected, result, strlen(expected)) == 0; + if (ret) { + put("fstools/ar", command); + report("OK (%s)\n", command); + } + free(result); + } + + unlink(src); + unlink(dst); + free(src); + free(dst); + return ret; +} + +static int test_ranlib(int logdepth, const char *command, const char *obj) +{ + char *cmd, *archive, *archive_esc, *obj_esc; + const char *ar; + int ret; + ar = get("fstools/ar"); + logprintf(logdepth, "trying '%s'\n", command); + + archive = tempfile_new(".a"); + archive_esc = shell_escape_dup(archive); + obj_esc = shell_escape_dup(obj); + cmd = malloc(strlen(command) + strlen(obj_esc) + strlen(archive_esc) + 64); + + sprintf(cmd, "%s r %s %s", ar, archive_esc, obj_esc); + unlink(archive); + ret = run_shell(logdepth, cmd, NULL) == 0; + if (!ret) + goto fin; + + sprintf(cmd, "%s %s", command, archive_esc); + ret = run_shell(logdepth, cmd, NULL) == 0; + + if (ret) { + put("fstools/ranlib", command); + report("OK (%s)\n", command); + } + +fin:; + unlink(archive); + free(archive); + free(cmd); + free(archive_esc); + free(obj_esc); + return ret; +} + +static int test_awk(int logdepth, const char *command) +{ + char cmd[1024]; + char *out; + int ret = 0; + char *script, *script_esc; + + /* For some reason windows awk doesn't like the code with NLs */ + char *test_awk = + "BEGIN {" + " gsub(\"b\", \"B\", t);" + " print t;" + "}"; + + logprintf(logdepth, "trying '%s'\n", command); + script = tempfile_dump(test_awk, ".awk"); + script_esc = shell_escape_dup(script); + sprintf(cmd, "%s -v \"t=blobb\" -f %s", command, script_esc); + free(script_esc); + run_shell(logdepth, cmd, &out); + unlink(script); + free(script); + + if ((out != NULL) && (strncmp(out, "BloBB", 5) == 0)) { + put("fstools/awk", command); + report("OK (%s)\n", command); + ret = 1; + } + + free(out); + return ret; +} + +static int test_cat(int logdepth, const char *command) +{ + char cmd[1024]; + char *out; + int ret = 0; + char *fn, *fn_esc; + const char *test_str = "hello world"; + + logprintf(logdepth, "trying '%s'\n", command); + fn = tempfile_dump(test_str, ".txt"); + fn_esc = shell_escape_dup(fn); + sprintf(cmd, "%s %s", command, fn_esc); + run_shell(logdepth, cmd, &out); + unlink(fn); + free(fn); + free(fn_esc); + + if ((out != NULL) && (strncmp(out, test_str, strlen(test_str)) == 0)) { + put("fstools/cat", command); + report("OK (%s)\n", command); + ret = 1; + } + + free(out); + return ret; +} + +static int test_sed(int logdepth, const char *command) +{ + char cmd[1024]; + char *out; + int ret = 0; + char *fn, *fn_esc; + const char *test_str_in = "hello world"; + const char *test_str_out = "he11o wor1d"; + + logprintf(logdepth, "trying '%s'\n", command); + fn = tempfile_dump(test_str_in, ".txt"); + fn_esc = shell_escape_dup(fn); + sprintf(cmd, "%s \"s/l/1/g\" < %s", command, fn_esc); + run_shell(logdepth, cmd, &out); + unlink(fn); + free(fn); + free(fn_esc); + + if ((out != NULL) && (strncmp(out, test_str_out, strlen(test_str_out)) == 0)) { + put("fstools/sed", command); + report("OK (%s)\n", command); + ret = 1; + } + + free(out); + return ret; +} + +static int test_chmodx(int logdepth, const char *command) +{ + char *cmd, *tmp, *tmp_esc, *out, *s; + int ret; + + logprintf(logdepth, "trying '%s'\n", command); + tmp = tempfile_dump("#!/bin/sh\necho OK\n", ".bat"); + + tmp_esc = shell_escape_dup(tmp); + cmd = malloc(strlen(command) + strlen(tmp_esc) + 16); + sprintf(cmd, "%s %s", command, tmp_esc); + ret = run_shell(logdepth, cmd, NULL) == 0; + free(cmd); + if (!ret) { + free(tmp_esc); + return ret; + } + + ret = run(logdepth+1, tmp_esc, &out); + free(tmp_esc); + + if (ret == 0) { + for(s = out; s != NULL; s = str_chr(s, '\n')) { + logprintf(logdepth+1, "chmod line to test: '%s'\n", s); + if ((s[0] == 'O') && (s[1] == 'K')) { + logprintf(logdepth+2, "(OK)\n"); + ret = 1; + break; + } + s++; + } + } + else + ret = 0; + + free(out); + if (ret) { + put("fstools/chmodx", command); + logprintf(logdepth, "chmodx command validated: '%s'\n", command); + report("OK (%s)\n", command); + } + unlink(tmp); + return ret; +} + +static int test_file(int logdepth, const char *node, const char *command) +{ + char cmd[1024]; + char *out; + int ret = 0; + char *fn, *fn_esc; + + logprintf(logdepth, "trying '%s'\n", command); + fn = tempfile_dump("plain text file\r\n", ".txt"); + fn_esc = shell_escape_dup(fn); + sprintf(cmd, "%s %s", command, fn_esc); + run_shell(logdepth, cmd, &out); + unlink(fn); + free(fn); + free(fn_esc); + + if ((out != NULL) && (strstr(out, "text") != NULL)) { + put(node, command); + report("OK (%s)\n", command); + ret = 1; + } + + free(out); + return ret; +} + +int find_fstools_cp(const char *name, int logdepth, int fatal) +{ + const char *cp; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for cp... "); + logprintf(logdepth, "find_fstools_cp: trying to find cp...\n"); + logdepth++; + + cp = get("/arg/fstools/cp"); + if (cp == NULL) { + if (test_cp_ln(logdepth, "cp -rp", 0)) return 0; + if (test_cp_ln(logdepth, "cp -r", 0)) return 0; + if (test_cp_ln(logdepth, "copy /r", 0)) return 0; /* wine */ + } + else { + report(" user provided (%s)...", cp); + if (test_cp_ln(logdepth, cp, 0)) return 0; + } + return 1; +} + +int find_fstools_ln(const char *name, int logdepth, int fatal) +{ + const char *ln; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for ln... "); + logprintf(logdepth, "find_fstools_ln: trying to find ln...\n"); + logdepth++; + + ln = get("/arg/fstools/ln"); + if (ln == NULL) { + if (test_cp_ln(logdepth, "ln -sf",1 )) return 0; + if (test_cp_ln(logdepth, "ln -s",1 )) return 0; + if (test_cp_ln(logdepth, "ln", 1)) return 0; + /* "mklink /H" -> win32 equivalent to "ln" */ + /* "cp -s" -> same as "ln -s" */ + /* "cp -l" -> same as "ln" */ + if (test_cp_ln(logdepth, "cp", 1)) return 0; + } + else { + report(" user provided (%s)...", ln); + if (test_cp_ln(logdepth, ln, 1)) return 0; + } + return 1; +} + +int find_fstools_mv(const char *name, int logdepth, int fatal) +{ + const char *mv; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for mv... "); + logprintf(logdepth, "find_fstools_mv: trying to find mv...\n"); + logdepth++; + + mv = get("/arg/fstools/mv"); + if (mv == NULL) { + if (test_mv(logdepth, "mv")) return 0; + if (test_mv(logdepth, "move")) return 0; /* win32 */ + if (test_mv(logdepth, "cp")) return 0; + } + else { + report(" user provided (%s)...", mv); + if (test_mv(logdepth, mv)) return 0; + } + return 1; +} + +int find_fstools_rm(const char *name, int logdepth, int fatal) +{ + const char *rm; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for rm... "); + logprintf(logdepth, "find_fstools_rm: trying to find rm...\n"); + logdepth++; + + rm = get("/arg/fstools/rm"); + if (rm == NULL) { + if (test_rm(logdepth, "rm -rf")) return 0; + if (test_rm(logdepth, "rm -f")) return 0; + if (test_rm(logdepth, "rm")) return 0; + if (test_rm(logdepth, "del")) return 0; /* for win32 */ + } + else { + report(" user provided (%s)...", rm); + if (test_rm(logdepth, rm)) return 0; + } + return 1; +} + +int find_fstools_mkdir(const char *name, int logdepth, int fatal) +{ + const char *mkdir; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for mkdir... "); + logprintf(logdepth, "find_fstools_mkdir: trying to find mkdir...\n"); + logdepth++; + + mkdir = get("/arg/fstools/mkdir"); + if (mkdir == NULL) { + if (test_mkdir(logdepth, "mkdir -p")) return 0; + if (test_mkdir(logdepth, "md")) return 0; /* for win32 */ + } + else { + report(" user provided (%s)...", mkdir); + if (test_mkdir(logdepth, mkdir)) return 0; + } + return 1; +} + +int find_fstools_ar(const char *name, int logdepth, int fatal) +{ + const char *ar, *target; + char *targetar; + int len; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + require("sys/path_sep", logdepth, fatal); + + + report("Checking for ar... "); + logprintf(logdepth, "find_fstools_ar: trying to find ar...\n"); + logdepth++; + + ar = get("/arg/fstools/ar"); + if (ar == NULL) { + target = get("/arg/sys/target"); + if (target != NULL) { + logprintf(logdepth+1, "find_ar: crosscompiling for '%s', looking for target ar\n", target); + len = strlen(target); + targetar = malloc(len + 8); + memcpy(targetar, target, len); + strcpy(targetar + len, "-ar"); + if (test_ar(logdepth, targetar)) { + free(targetar); + return 0; + } + free(targetar); + } + if (test_ar(logdepth, "ar")) return 0; + if (test_ar(logdepth, "/usr/bin/ar")) return 0; + } + else { + report(" user provided (%s)...", ar); + if (test_ar(logdepth, ar)) return 0; + } + return 1; +} + +int find_fstools_ranlib(const char *name, int logdepth, int fatal) +{ + const char *ranlib, *target; + char *targetranlib; + int len; + char *test_code = NL "int zero() { return 0; }" NL; + char *obj = ".o"; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + require("fstools/ar", logdepth, fatal); + require("cc/cc", logdepth, fatal); + + report("Checking for ranlib... "); + logprintf(logdepth, "find_fstools_ranlib: trying to find ranlib...\n"); + logdepth++; + + logprintf(logdepth, "compiling test object...\n"); + if (compile_code(logdepth+1, test_code, &obj, NULL, "-c", NULL) != 0) { + logprintf(logdepth, "ERROR: Can't compile test object\n"); + report("ERROR: Can't compile test object\n"); + abort(); + } + + ranlib = get("/arg/fstools/ranlib"); + if (ranlib == NULL) { + target = get("/arg/sys/target"); + if (target != NULL) { + logprintf(logdepth+1, "find_ranlib: crosscompiling for '%s', looking for target ranlib\n", target); + len = strlen(target); + targetranlib = malloc(len + 16); + memcpy(targetranlib, target, len); + strcpy(targetranlib + len, "-ranlib"); + if (test_ranlib(logdepth, targetranlib, obj)) { + free(targetranlib); + return 0; + } + free(targetranlib); + } + if (test_ranlib(logdepth, "ranlib", obj)) goto found; + if (test_ranlib(logdepth, "/usr/bin/ranlib", obj)) goto found; + if (test_ranlib(logdepth, "ar -s", obj)) goto found; + if (test_ranlib(logdepth, "/usr/bin/ar -s", obj)) goto found; + + /* some systems (for example IRIX) can't run s without doing + something else; t is harmless */ + if (test_ranlib(logdepth, "ar ts", obj)) goto found; + if (test_ranlib(logdepth, "/usr/bin/ar ts", obj)) goto found; + + /* final fallback: some systems (for example minix3) simply + do not have ranlib or ar equivalent; it's easier to detect + a dummy command than to force conditions into Makefiles */ + if (test_ranlib(logdepth, "true", obj)) goto found; + } + else { + report(" user provided (%s)...", ranlib); + if (test_ranlib(logdepth, ranlib, obj)) goto found; + } + unlink(obj); + free(obj); + return 1; +found:; + unlink(obj); + free(obj); + return 0; +} + +int find_fstools_awk(const char *name, int logdepth, int fatal) +{ + const char *awk; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for awk... "); + logprintf(logdepth, "find_fstools_awk: trying to find awk...\n"); + logdepth++; + + awk = get("/arg/fstools/awk"); + if (awk == NULL) { + if (test_awk(logdepth, "awk")) return 0; + if (test_awk(logdepth, "gawk")) return 0; + if (test_awk(logdepth, "mawk")) return 0; + if (test_awk(logdepth, "nawk")) return 0; + } + else { + report(" user provided (%s)...", awk); + if (test_awk(logdepth, awk)) return 0; + } + return 1; +} + +int find_fstools_chmodx(const char *name, int logdepth, int fatal) +{ + const char *chmod; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for chmod to executable... "); + logprintf(logdepth, "find_fstools_awk: trying to find chmod to executable...\n"); + logdepth++; + + chmod = get("/arg/fstools/chmodx"); + if (chmod == NULL) { + if (test_chmodx(logdepth, "chmod +x")) return 0; + if (test_chmodx(logdepth, "chmod 755")) return 0; + if (test_chmodx(logdepth, "")) return 0; /* on some systems we don't need to do anything */ + } + else { + report(" user provided (%s)...", chmod); + if (test_chmodx(logdepth, chmod)) return 0; + } + return 1; +} + +int find_fstools_cat(const char *name, int logdepth, int fatal) +{ + const char *cat; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for cat... "); + logprintf(logdepth, "find_fstools_cat: trying to find cat...\n"); + logdepth++; + + cat = get("/arg/fstools/cat"); + if (cat == NULL) { + if (test_cat(logdepth, "cat")) return 0; + if (test_cat(logdepth, "type")) return 0; + } + else { + report(" user provided (%s)...", cat); + if (test_cat(logdepth, cat)) return 0; + } + return 1; +} + +int find_fstools_sed(const char *name, int logdepth, int fatal) +{ + const char *sed; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for sed... "); + logprintf(logdepth, "find_fstools_sed: trying to find sed...\n"); + logdepth++; + + sed = get("/arg/fstools/sed"); + if (sed == NULL) { + if (test_sed(logdepth, "sed")) return 0; + } + else { + report(" user provided (%s)...", sed); + if (test_sed(logdepth, sed)) return 0; + } + return 1; +} + +int find_fstools_file_l(const char *name, int logdepth, int fatal) +{ + const char *file; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for file... "); + logprintf(logdepth, "find_fstools_file_l: trying to find file -L...\n"); + logdepth++; + + file = get("/arg/fstools/file_l"); + if (file == NULL) { + if (test_file(logdepth, "fstools/file_l", "file -L")) return 0; + if (test_file(logdepth, "fstools/file_l", "file")) return 0; + } + else { + report(" user provided (%s)...", file); + if (test_file(logdepth, "fstools/file_l", file)) return 0; + } + return 1; +} + +int find_fstools_file(const char *name, int logdepth, int fatal) +{ + const char *file; + + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + report("Checking for file... "); + logprintf(logdepth, "find_fstools_file: trying to find file...\n"); + logdepth++; + + file = get("/arg/fstools/file"); + if (file == NULL) { + if (test_file(logdepth, "fstools/file", "file")) return 0; + } + else { + report(" user provided (%s)...", file); + if (test_file(logdepth, "fstools/file", file)) return 0; + } + return 1; +} diff --git a/scconfig/src/default/find_io.c b/scconfig/src/default/find_io.c new file mode 100644 index 0000000..c94a3d6 --- /dev/null +++ b/scconfig/src/default/find_io.c @@ -0,0 +1,189 @@ +/* + scconfig - detect I/O features of the system + Copyright (C) 2010 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_io_pipe(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " int fd[2];" + NL " if (pipe(fd) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for pipe(2)... "); + logprintf(logdepth, "find_io_pipe: trying to find pipe(2)...\n"); + logdepth++; + + + if (try_icl(logdepth, "libs/io/pipe", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/io/pipe"); +} + +int find_io_dup2(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " int fd;" + NL " if (dup2(1, 4) == 4)" + NL " write(4, \"OK\\n\", 3); " + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for dup2(2)... "); + logprintf(logdepth, "find_io_dup2: trying to find dup2(2)...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/io/dup2", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/io/dup2"); +} + + +int find_io_fileno(const char *name, int logdepth, int fatal) +{ + char test_c[256]; + char *test_c_ = + NL "#include " + NL "int main() {" + NL no_implicit(int, "%s", "%s") + NL " if (%s(stdout) >= 0)" + NL " puts(\"OK\"); " + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for fileno(3)... "); + logprintf(logdepth, "find_io_fileno: trying to find fileno(3)...\n"); + logdepth++; + + /* UNIX */ + sprintf(test_c, test_c_, "fileno", "fileno", "fileno"); + if (try_icl(logdepth, "libs/io/fileno", test_c, "#include \n", NULL, NULL)) { + put("libs/io/fileno/call", "fileno"); + return 0; + } + + sprintf(test_c, test_c_, "fileno", "fileno", "fileno"); + if (try_icl(logdepth, "libs/io/fileno", test_c, "#define _XOPEN_SOURCE\n#include \n", NULL, NULL)) { + put("libs/io/fileno/call", "fileno"); + return 0; + } + + /* windows */ + sprintf(test_c, test_c_, "_fileno", "_fileno", "_fileno"); + if (try_icl(logdepth, "libs/io/fileno", test_c, "#include \n", NULL, NULL)) { + put("libs/io/fileno/call", "_fileno"); + return 0; + } + + return try_fail(logdepth, "libs/io/fileno"); +} + +int find_io_lseek(const char *name, int logdepth, int fatal) +{ +#define NODE "libs/io/lseek" + + char test_c[3256]; + const char *test_c_template = + NL "#include " + NL "#include " + NL "int main() {" + NL " const char *filename = \"%s\";" + NL no_implicit(int, "%s", "%s") + NL " int fd = open(filename, O_WRONLY);" + NL " if (write(fd, \"abc\", 3) == 3 && %s(fd, 1, SEEK_SET) == 1)" + NL " puts(\"OK\"); " + NL " return 0;" + NL "}" + NL; + char *tmpf; + const char *incs[] = {"#include ","#include ",NULL}; + const char *fns[] = {"lseek", "_lseek", NULL}; + const char **inc; + const char **fn; + + require("cc/cc", logdepth, fatal); + + report("Checking for lseek(2)... "); + logprintf(logdepth, "find_io_lseek: trying to find lseek(2)...\n"); + logdepth++; + + tmpf = tempfile_new(".psx"); + + for (inc = incs, fn = fns; *fn; ++inc, ++fn) { + sprintf(test_c, test_c_template, tmpf, *fn, *fn, *fn); + if (try_icl(logdepth, NODE, test_c, *inc, NULL, NULL)) { + put(NODE "/call", *fn); + return 0; + } + } + + return try_fail(logdepth, NODE); +#undef NODE +} + +int find_io_popen(const char *name, int logdepth, int fatal) +{ + const char **i, *incs[] = {"#define _XOPEN_SOURCE", "#define _BSD_SOURCE", "#define _POSIX_C_SOURCE 2", NULL}; + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " FILE *f = popen(\"echo OK\", \"r\");" + NL " char line[16];" + NL " if (f == NULL) return 0;" + NL " if (fgets(line, sizeof(line)-1, f) == NULL) return 0;" + NL " puts(line);" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for popen(3)... "); + logprintf(logdepth, "find_io_popen: trying to find popen(3)...\n"); + logdepth++; + + for(i = incs; *i != NULL; i++) + if (try_icl(logdepth, "libs/io/popen", test_c, *i, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/io/popen"); +} + diff --git a/scconfig/src/default/find_libs.c b/scconfig/src/default/find_libs.c new file mode 100644 index 0000000..4ba66f1 --- /dev/null +++ b/scconfig/src/default/find_libs.c @@ -0,0 +1,194 @@ +/* + scconfig - detection of standard library features + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +static int trydlc(int logdepth, const char *test_c_dlc, const char *cflagsf, const char *ldflagsf, const char *dlc) +{ + char *cflags, *ldflags; + + cflags = malloc(strlen(dlc) + 64); + ldflags = malloc(strlen(dlc)*2 + 256); + sprintf(cflags, cflagsf, dlc); + sprintf(ldflags, ldflagsf, dlc, dlc); + if (try_icl(logdepth, NULL, test_c_dlc, NULL, cflags, ldflags)) { + *cflags = ' '; + append("cc/cflags", cflags); + put("libs/ldl", ldflags); + put("libs/dl-compat", strue); + report("OK (%s and %s)\n", cflags, ldflags); + free(cflags); + free(ldflags); + return 1; + } + free(cflags); + free(ldflags); + return 0; +} + +int find_lib_ldl(const char *name, int logdepth, int fatal) +{ + const char *ldl, *dlc; + char *s; + + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " void *handle;" + NL " handle = dlopen(\"/this file does not exist.\", RTLD_NOW);" + NL " if (handle == NULL) printf(\"OK\\n\");" + NL " return 0;" + NL "}" + NL; + + char *test_c_dlc = + NL "#include " + NL "#include " + NL "int main() {" + NL " void *handle;" + NL " handle = dlopen(\"/this file does not exist.\", RTLD_NOW);" + NL " if (handle == NULL) printf(\"OK\\n\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for -ldl... "); + logprintf(logdepth, "find_lib_ldl: trying to find ldl...\n"); + logdepth++; + + ldl = get("/arg/libs/ldl"); + if (ldl == NULL) { + dlc = get("/arg/libs/dl-compat"); + + if (dlc == NULL) { + /* If dlc is not explicitly requested by the user, try standard + dl (see whether we need -ldl for dlopen()) */ + if (try_icl(logdepth, NULL, test_c, NULL, NULL, NULL)) { + put("libs/ldl", ""); + put("libs/ldl/includes", "#include \\n"); + report("OK ()\n"); + return 0; + } + if (try_icl(logdepth, NULL, test_c, NULL, NULL, "-ldl")) { + put("libs/ldl", "-ldl"); + put("libs/ldl/includes", "#include \\n"); + report("OK (-ldl)\n"); + return 0; + } + } + /* try dl-compat (dl compatibility lib) */ + if (dlc != NULL) { + /* test at user supplied dlc prefix: + - first assume the linker will find it + - next assume gcc and pass rpath to the linker + - finally try static linking */ + if (trydlc(logdepth, test_c_dlc, "-I%s/include", "-L%s/lib -ldl-compat\000%s", dlc)) { + put("libs/ldl/includes", "#include \\n"); + return 0; + } + if (trydlc(logdepth, test_c_dlc, "-I%s/include", "-L%s/lib -Wl,-rpath=%s/lib -ldl-compat", dlc)) { + put("libs/ldl/includes", "#include \\n"); + return 0; + } + if (trydlc(logdepth, test_c_dlc, "-I%s/include", "%s/lib/libdl-compat.a\000%s", dlc)) { + put("libs/ldl/includes", "#include \\n"); + return 0; + } + } + else if (try_icl(logdepth, NULL, test_c_dlc, NULL, NULL, "-ldl-compat")) { + /* check at normal system installation */ + put("libs/ldl", "-ldl-compat"); + put("libs/ldl/includes", "#include \\n"); + report("OK (-ldl-compat)\n"); + return 0; + } + } + else { + report("User provided... "); + s = strclone(ldl); + if (try_icl(logdepth, NULL, test_c, NULL, NULL, s)) { + put("libs/ldl", ldl); + put("libs/ldl/includes", "#include \\n"); + report("OK (%s)\n", ldl); + free(s); + return 0; + } + free(s); + } + report("Not found\n"); + return 1; +} + +int find_lib_LoadLibrary(const char *name, int logdepth, int fatal) +{ + /*char *s;*/ + + char *test_c = + NL "#include " + NL "int main() {" + NL " void *handle;" + NL " handle = LoadLibrary(\"/this file does not exist.\");" + NL " if (handle == NULL) printf(\"OK\\n\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for LoadLibrary... "); + logprintf(logdepth, "find_lib_LoadLibrary: trying to find LoadLibrary...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/LoadLibrary", test_c, "#include ", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/LoadLibrary"); +} + +int find_lib_errno(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " errno = 0;" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + report("Checking for errno.h... "); + logprintf(logdepth, "find_lib_errno: trying to find errno...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/errno", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "libs/errno"); +} diff --git a/scconfig/src/default/find_printf.c b/scconfig/src/default/find_printf.c new file mode 100644 index 0000000..1aeb782 --- /dev/null +++ b/scconfig/src/default/find_printf.c @@ -0,0 +1,318 @@ +/* + scconfig - detection of printf-related features + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +static int tryx(int logdepth, const char *test_c, const char *trying, const char *expected) +{ + char *out = NULL; + char buff[512]; + + logprintf(logdepth, "trying '%s'\n", trying); + sprintf(buff, test_c, trying, trying); + if (compile_run(logdepth+1, buff, NULL, NULL, NULL, &out) == 0) { + if (strncmp(out, expected, strlen(expected)) == 0) { + free(out); + return 1; + } + free(out); + } + return 0; +} + +static int tryc(int logdepth, const char *test_c, const char *trying) +{ + char *out = NULL; + char buff[512]; + char *spc, *end; + + logprintf(logdepth, "trying '%s'\n", trying); + sprintf(buff, test_c, trying); + if (compile_run(logdepth+1, buff, NULL, NULL, NULL, &out) == 0) { + spc = str_chr(out, ' '); + if (spc == NULL) + return 0; + *spc = '\0'; + spc++; + end = str_chr(spc, ' '); + if (end == NULL) + return 0; + *end = '\0'; + if (strcmp(out, spc) == 0) { + free(out); + put("libs/printf_ptrcast", trying); + report("OK (%s)\n", trying); + return 1; + } + free(out); + } + return 0; +} + +int find_printf_x(const char *name, int logdepth, int fatal) +{ + const char *pfx; + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " printf(\"'%s%%x'/'%s%%x'\\n\", (size_t)0x1234, NULL);" + NL " return 0;" + NL "}" + NL; + char *expected = "'0x1234'/'0x0'"; + + require("cc/cc", logdepth, fatal); + + report("Checking for printf %%x prefix... "); + logprintf(logdepth, "find_printf_x: trying to find printf %%x prefix...\n"); + logdepth++; + + pfx = get("/arg/libs/printf_x"); + if (pfx == NULL) { + + if (tryx(logdepth, test_c, "", expected)) { + put("libs/printf_x", ""); + report("OK ()\n"); + return 0; + } + if (tryx(logdepth, test_c, "0x", expected)) { + put("libs/printf_x", "0x"); + report("OK (0x)\n"); + return 0; + } + } + else { + report("User provided... "); + if (tryx(logdepth, test_c, pfx, expected)) { + put("libs/printf_x", pfx); + report("OK (%s)\n", pfx); + return 0; + } + } + return 1; +} + +int find_printf_ptrcast(const char *name, int logdepth, int fatal) +{ + const char *cast; + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " printf(\"%%d %%d \\n\", sizeof(void *), sizeof(%s));" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for printf %%x pointer cast... "); + logprintf(logdepth, "find_printf_ptrcast: trying to find printf %%x pointer cast...\n"); + logdepth++; + + cast = get("/arg/libs/printf_ptrcast"); + if (cast == NULL) { + if (tryc(logdepth, test_c, "unsigned int")) return 0; + if (tryc(logdepth, test_c, "unsigned long")) return 0; + if (tryc(logdepth, test_c, "unsigned long long")) return 0; + } + else { + report("User provided... "); + if (tryc(logdepth, test_c, cast)) return 0; + } + return 1; +} + +int find_snprintf(const char *name, int logdepth, int fatal) +{ + char *out; + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " char buff[9];" + NL " char *s = buff+2;" + NL + NL " /* build a fence */" + NL " buff[0] = 0;" + NL " buff[1] = 65;" + NL " buff[7] = 66;" + NL " buff[8] = 0;" + NL + NL " snprintf(s, 4, \"%d\", 123456);" + NL " if ((buff[0] == 0) && (buff[1] == 65) && (buff[7] == 65) && (buff[8] == 0))" + NL " printf(\"%s\\n\", s);" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for snprintf... "); + logprintf(logdepth, "find_snprintf_works: trying to find snprintf...\n"); + logdepth++; + logprintf(logdepth, "trying snprintf...\n"); + + if (compile_run(logdepth+1, test_c, NULL, NULL, NULL, &out) == 0) { + if (cross_blind) { + put("libs/snprintf", strue); + report("OK (can't check if safe)\n"); + free(out); + return 0; + } + if (strcmp(out, "123")) { + put("libs/snprintf", strue); + put("libs/snprintf_safe", strue); + report("OK (safe)\n"); + free(out); + return 0; + } + if (strcmp(out, "1234")) { + put("libs/snprintf", strue); + put("libs/snprintf_safe", sfalse); + report("OK (UNSAFE)\n"); + free(out); + return 0; + } + free(out); + report("not found (broken output).\n"); + } + else { + report("not found (no output).\n"); + } + put("libs/snprintf", sfalse); + return 1; +} + +int find_dprintf(const char *name, int logdepth, int fatal) +{ + char *out; + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " dprintf(1, \"OK\\n\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for dprintf... "); + logprintf(logdepth, "find_dprintf: trying to find dprintf...\n"); + logdepth++; + logprintf(logdepth, "trying dprintf...\n"); + + if ((compile_run(logdepth+1, test_c, NULL, NULL, NULL, &out) == 0) && (strcmp(out, "OK"))) { + put("libs/dprintf", strue); + report("found\n"); + free(out); + return 0; + } + put("libs/dprintf", sfalse); + report("not found\n"); + return 1; +} + +int find_vdprintf(const char *name, int logdepth, int fatal) +{ + char *out; + char *test_c = + NL "#include " + NL "#include " + NL "#include " + NL "void local_dprintf(int fd, const char *fmt, ...)" + NL "{" + NL " va_list ap;" + NL " va_start(ap, fmt);" + NL " vdprintf(fd, fmt, ap);" + NL "}" + NL "int main() {" + NL " local_dprintf(1, \"OK\\n\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for vdprintf... "); + logprintf(logdepth, "find_vdprintf: trying to find vdprintf...\n"); + logdepth++; + logprintf(logdepth, "trying vdprintf...\n"); + + if ((compile_run(logdepth+1, test_c, NULL, NULL, NULL, &out) == 0) && (strcmp(out, "OK"))) { + put("libs/vdprintf", strue); + report("found\n"); + free(out); + return 0; + } + put("libs/vdprintf", sfalse); + report("not found\n"); + return 1; +} + +int find_vsnprintf(const char *name, int logdepth, int fatal) +{ + char *out; + char *test_c = + NL "#include " + NL "#include " + NL "#include " + NL "void local_vsnprintf(char *s, int len, const char *fmt, ...)" + NL "{" + NL " va_list ap;" + NL " va_start(ap, fmt);" + NL " vsnprintf(s, len, fmt, ap);" + NL "}" + NL "int main() {" + NL " char s[16];" + NL " *s = '\\0';" + NL " local_vsnprintf(s, 14, \"OK\\n\");" + NL " printf(\"%s\", s);" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for vsnprintf... "); + logprintf(logdepth, "find_vsnprintf: trying to find vsnprintf...\n"); + logdepth++; + logprintf(logdepth, "trying vsnprintf...\n"); + + if ((compile_run(logdepth+1, test_c, NULL, NULL, NULL, &out) == 0) && (strcmp(out, "OK"))) { + put("libs/vsnprintf", strue); + report("found\n"); + free(out); + return 0; + } + put("libs/vsnprintf", sfalse); + report("not found\n"); + return 1; +} diff --git a/scconfig/src/default/find_proc.c b/scconfig/src/default/find_proc.c new file mode 100644 index 0000000..fec68a4 --- /dev/null +++ b/scconfig/src/default/find_proc.c @@ -0,0 +1,137 @@ +/* + scconfig - detection of standard library features (processes) + Copyright (C) 2016 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_proc__spawnvp(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " const char *a[3] = {\"/c\", \"echo OK\", NULL};" + NL " _spawnvp(_P_WAIT, \"cmd\", a);" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + report("Checking for _spawnvp... "); + logprintf(logdepth, "find_proc__spawnvp: trying to find _spawnvp...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/proc/_spawnvp", test_c, "#include ", NULL, NULL)) return 0; + + return try_fail(logdepth, "libs/proc/_spawnvp"); +} + + + +int find_proc_fork(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " if (fork() == 0) { return 0; }" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + + /* NOTE: can't print OK from the child process because of a possible race + with the parent immediately exiting without wait(). */ + + require("cc/cc", logdepth, fatal); + + report("Checking for fork... "); + logprintf(logdepth, "find_proc_fork: trying to find fork...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/proc/fork", test_c, "#include ", NULL, NULL)) return 0; + + return try_fail(logdepth, "libs/proc/fork"); +} + + +int find_proc_wait(const char *name, int logdepth, int fatal) +{ + char *inc; + const char *inc1; + char test_c[1024]; + char *test_c_in = + NL "%s\n" + NL "#include " + NL "#include " + NL "int main() {" + NL " int st = 0;" + NL " if (fork() == 0) {" + NL " printf(\"O\");" + NL " return 42;" + NL " }" + NL " wait(&st);" + NL " if (WIFEXITED(st) && (WEXITSTATUS(st) == 42))" + NL " printf(\"K\");" + NL " else" + NL " printf(\"%%d\", st);" + NL " printf(\"\\n\");" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + if (require("libs/proc/fork", logdepth, fatal)) + return try_fail(logdepth, "libs/proc/wait"); + + report("Checking for wait... "); + logprintf(logdepth, "find_proc_wait: trying to find wait...\n"); + logdepth++; + + inc1 = get("libs/proc/fork/includes"); + if (inc1 != NULL) { + char *i, *o; + inc = strclone(inc1); + for(i = o = inc; *i != '\0'; i++,o++) { + if ((i[0] == '\\') && (i[1] == 'n')) { + *o = '\n'; + i++; + } + else + *o = *i; + } + *o = '\0'; + sprintf(test_c, test_c_in, inc); + free(inc); + } + else + sprintf(test_c, test_c_in, ""); + + if (try_icl(logdepth, "libs/proc/wait", test_c, "#include \n#include ", NULL, NULL)) return 0; + + return try_fail(logdepth, "libs/proc/wait"); +} diff --git a/scconfig/src/default/find_signal.c b/scconfig/src/default/find_signal.c new file mode 100644 index 0000000..c7f46f8 --- /dev/null +++ b/scconfig/src/default/find_signal.c @@ -0,0 +1,141 @@ +/* + scconfig - detection of standard library features + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +static int try_bad(int logdepth, const char *test_c, char *cflags, char *ldflags) +{ + char *out = NULL; + + logprintf(logdepth, "trying signal (neg) with ldflags '%s'\n", ldflags == NULL ? get("cc/ldflags") : ldflags); + if (compile_run(logdepth+1, test_c, NULL, cflags, ldflags, &out) == 0) { + if (target_emu_fail(out) || (strncmp(out, "BAD", 3) == 0)) { + free(out); + return 0; + } + free(out); + } + return 1; +} + + +int find_signal_raise(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "int main(int argc, char *argv[]) {" + NL " printf(\"OK\\n\");" + NL " if (argc == 16)" + NL " raise(1);" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for raise()... "); + logprintf(logdepth, "find_signal_raise: trying to find raise()...\n"); + logdepth++; + + if (try_icl(logdepth, "signal/raise", test_c, NULL, NULL, NULL)) + return 0; + return try_fail(logdepth, "signal/raise"); +} + + +int find_signal_names(const char *rname, int logdepth, int fatal) +{ + char *test_c_exists = + NL "#include " + NL "#include " + NL "int main(int argc, char *argv[]) {" + NL " printf(\"OK\\n\");" + NL " if (argc == 16)" + NL " raise(%s);" + NL " return 0;" + NL "}" + NL; + char *test_c_terms = + NL "#include " + NL "#include " + NL "int main() {" + NL " raise(%s);" + NL " printf(\"BAD\\n\");" + NL " return 0;" + NL "}" + NL; + char test_c[256]; + const char *names[] = {"SIGINT", "SIGABRT", "SIGKILL", "SIGTERM", "SIGQUIT", "SIGHUP", "SIGFPE", "SIGSEGV", "SIGPIPE", NULL}; + const char **name; + char path[256], *pathend; + const char *prefix = "signal/names/"; + + require("cc/cc", logdepth, fatal); + require("signal/raise/*", logdepth, fatal); + + strcpy(path, prefix); + pathend = path + strlen(prefix); + + for(name = names; *name != NULL; name++) { + /* check whether it exists */ + report("Checking whether %s exists... ", *name); + logprintf(logdepth, "find_signal_names: checking whether %s exists\n", *name); + logdepth++; + sprintf(test_c, test_c_exists, *name); + strcpy(pathend, *name); + if (!try_icl(logdepth, path, test_c, NULL, NULL, NULL)) { + logdepth--; + continue; + } + + /* check whether it exists */ + logdepth--; + report("Checking whether %s terminates... ", *name); + logprintf(logdepth, "find_signal_names: checking whether %s terminates\n", *name); + logdepth++; + + sprintf(test_c, test_c_terms, *name); + sprintf(pathend, "%s/terminates", *name); + if (try_bad(logdepth, test_c, NULL, "")) { + put(path, strue); + report("terminates\n"); + } + else { + report("does not terminate\n"); + put(path, sfalse); + } + logdepth--; + } + +/* to avoid redetection */ + put("signal/names/presents", strue); + + return 0; +} + diff --git a/scconfig/src/default/find_str.c b/scconfig/src/default/find_str.c new file mode 100644 index 0000000..5e29531 --- /dev/null +++ b/scconfig/src/default/find_str.c @@ -0,0 +1,76 @@ +/* + scconfig - detection of standard library features: strings + Copyright (C) 2017 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_strcasecmp(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " if ((strcasecmp(\"foo\", \"FoO\") == 0) && (strcasecmp(\"foo\", \"bar\") != 0))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for strcasecmp()... "); + logprintf(logdepth, "find_fs_strcasecmp: trying to find strcasecmp...\n"); + logdepth++; + + if (try_icl(logdepth, "str/strcasecmp", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "str/strcasecmp"); +} + + +int find_strncasecmp(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "#include " + NL "int main() {" + NL " if ((strncasecmp(\"foo1\", \"FoO2\", 3) == 0) && (strncasecmp(\"foo1\", \"bar2\", 3) != 0))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for strncasecmp()... "); + logprintf(logdepth, "find_fs_strncasecmp: trying to find strncasecmp...\n"); + logdepth++; + + if (try_icl(logdepth, "str/strncasecmp", test_c, NULL, NULL, NULL)) return 0; + return try_fail(logdepth, "str/strncasecmp"); +} + diff --git a/scconfig/src/default/find_sys.c b/scconfig/src/default/find_sys.c new file mode 100644 index 0000000..79803cf --- /dev/null +++ b/scconfig/src/default/find_sys.c @@ -0,0 +1,491 @@ +/* + scconfig - detect features of the system or the host/target computer + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_sys_ptrwidth(const char *name, int logdepth, int fatal) +{ + char *end, W[32]; + char *out = NULL; + int w; + + char *test_c = + NL "#include " + NL "int main() {" + NL " void *ptr;" + NL " printf(\"%d\\n\", sizeof(ptr));" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for pointer width... "); + logprintf(logdepth, "find_sys_ptrwidth: trying to find pointer width...\n"); + logdepth++; + + if (compile_run(logdepth, test_c, NULL, NULL, NULL, &out) == 0) { + w = strtol(out, &end, 10); + if ((*end != '\0') && (*end != '\n') && (*end != '\r')) { + report("FAILED (test code failed)\n"); + logprintf(logdepth+1, "FAILED: returned '%s' which is not a valid decimal number (at '%s')\n", out, end); + return 1; + } + sprintf(W, "%d", w * 8); + report("OK (%s bits)\n", W); + put("sys/ptrwidth", W); + logprintf(logdepth+1, "OK (%s bits)\n", W); + } + return 0; +} + +int find_sys_byte_order(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "int main() {" + NL " long int i = 8;" + NL " char *s = (char *)&i;" + NL " printf(\"%d\\n\", s[0]);" + NL " return 0;" + NL "}" + NL; + const char* test_c_blind_template = + NL "#include " + NL "#include " + NL "#ifndef __BYTE_ORDER" + NL "#error \"ERROR 1\"" + NL "void void *;" + NL "#endif" + NL "#ifndef __%s_ENDIAN" + NL "#error \"ERROR 2\"" + NL "char char *;" + NL "#endif" + NL "#if __BYTE_ORDER != __%s_ENDIAN" + NL "#error \"ERROR 3\"" + NL "int int *;" + NL "#endif" + NL "int main() {" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + const char *key = "sys/byte_order"; + const char *endians1[] = { "LITTLE", "BIG", NULL }; + const char *endians2[] = { "LSB", "MSB", NULL }; + int index; + char test_c_blind[1024]; + char *end; + const char *W; + char *out = NULL; + int w; + + require("cc/cc", logdepth, fatal); + + report("Checking for byte order... "); + logprintf(logdepth, "find_sys_byte_order: trying to find byte order...\n"); + logdepth++; + + if ((!isblind(db_cwd)) && compile_run(logdepth, test_c, NULL, NULL, NULL, &out) == 0) { + w = strtol(out, &end, 10); + if (((*end != '\0') && (*end != '\n') && (*end != '\r')) || ((w != 0) && (w != 8))) { + report("FAILED (test code failed)\n"); + logprintf(logdepth+1, "FAILED: returned '%s' which is not a valid decimal number (at '%s')\n", out, end); + return 1; + } + if (w == 0) + W = "MSB"; + else + W = "LSB"; + + report("OK (%s first)\n", W); + put("sys/byte_order", W); + logprintf(logdepth+1, "OK (%s first)\n", W); + return 0; + } + + for (index=0; endians1[index]!=NULL; ++index) + { + sprintf(test_c_blind, test_c_blind_template, endians1[index], endians1[index]); + + if (compile_run(logdepth, test_c_blind, NULL, NULL, NULL, NULL) == 0) + { + W = endians2[index]; + report("OK (%s first)\n", W); + put(key, W); + return 0; + } + } + + report("FAILED (cannot determine byte order, you must supply it)\n"); + return try_fail(logdepth, key); +} + +static int test_shell_eats_backslash(int logdepth) +{ + char *test = "echo c:\\n"; + char *out; + + + logprintf(logdepth, "testing if shell eats \\...\n"); + run_shell(logdepth+1, test, &out); + if (out == NULL) { + logprintf(logdepth+1, "oops, couldn't run shell?! (returned NULL)\n"); + report("ERROR: shell fails."); + abort(); + } + if (out[2] == '\\') { + logprintf(logdepth, "shell does NOT eat \\...\n"); + put("sys/shell_eats_backslash", sfalse); + free(out); + return 0; + } + + free(out); + logprintf(logdepth, "shell eats \\...\n"); + put("sys/shell_eats_backslash", strue); + return 1; +} + + +static int try_get_cwd(int logdepth, const char *cmd) +{ + char *cwd, *end; + run_shell(logdepth+1, cmd, &cwd); + if (cwd != NULL) { + end = strpbrk(cwd, "\r\n"); + if (end != NULL) + *end = '\0'; + if (*cwd != '\0') { + end = cwd + strlen(cwd) - 1; + while((*end == ' ') || (*end == '\t')) { *end = '\0'; end--; } + put("sys/tmp", cwd); + /* ugly hack for win32: paths there start as c:\ */ + if ((cwd[1] == ':') && (cwd[2] == '\\')) + append("sys/tmp", "\\"); + else + append("sys/tmp", "/"); + logprintf(logdepth, "cwd is '%s'\n", get("sys/tmp")); + free(cwd); + return 1; + } + else + free(cwd); + } + return 0; +} + +static int try_tmp(int logdepth) +{ + char *fn; + + logprintf(logdepth, "validating temp dir '%s'\n", get("sys/tmp")); + fn = tempfile_new_noabort(""); + if (fn != NULL) { + unlink(fn); + free(fn); + logprintf(logdepth, "temp dir works!\n"); + return 1; + } + + logprintf(logdepth, "temp dir fails\n"); + return 0; +} + +/* test temp dir with all sort of workarounds */ +static int try_tmp_all(int logdepth) +{ + const char *tmp, *si; + char c; + char *t, *so, *old_tmp; + int eats, n; + + tmp = get("sys/tmp"); + + /* path must end in path separator */ + c = tmp[strlen(tmp)-1]; + if ((c != '/') && (c != '\\')) { + append("sys/tmp", "/"); + tmp = get("sys/tmp"); + } + + logprintf(logdepth, "trying detected temp dir '%s'\n", tmp); + if (try_tmp(logdepth+1)) return 1; + + /* try msys-on-windows hack: if path starts with /d/something, try d:/ instead */ + if ((tmp[0] == '/') && (isalpha(tmp[1])) && (tmp[2] == '/')) { + /* for the next test we shouldn't use our half-detected tmp path but go with . */ + old_tmp = strclone(tmp); + put("sys/tmp", ""); + eats = istrue(get("sys/shell_eats_backslash")); + tmp = old_tmp; + logprintf(logdepth, "tmp2='%s' eats=%d\n", tmp, eats); + t = malloc(strlen(tmp) * 2); + t[0] = tmp[1]; + t[1] = ':'; + for(si = tmp + 2, so = t + 2; *si != '\0'; si++, so++) { + if (*si == '/') { + *so = '\\'; + if (eats) { + for(n = 0; n < 3; n++) { + so++; + *so = '\\'; + } + } + } + else + *so = *si; + } + *so = '\0'; + free(old_tmp); + + logprintf(logdepth, "trying windows fix: '%s'\n", t); + put("sys/tmp", t); + free(t); + if (try_tmp(logdepth+1)) { + if (eats) + put("sys/path_sep", "\\\\\\\\"); + else + put("sys/path_sep", "\\"); + return 1; + } + tmp = get("sys/tmp"); + } + + /* fail. Set back tmp to empty so next command has a chance to run */ + put("sys/tmp", ""); + return 0; +} + +int find_tmp(const char *name, int logdepth, int fatal) +{ + const char *usertmp; + + if (in_cross_target) { + report("Temp dir for cross compilation target is the same as for host..."); + logprintf(logdepth, "Copying temp dir from host to target\n"); + require("/host/sys/tmp", logdepth, fatal); + usertmp = get("/host/sys/tmp"); + if (usertmp == NULL) { + report("Host temp dir not found.\n"); + logprintf(logdepth, "Host temp dir not found.\n"); + return 1; + } + put("sys/tmp", usertmp); + return 0; + } + + /* we need shell for later tests; do this detection in . */ + put("sys/tmp", ""); + require("sys/shell", logdepth, fatal); + + put("sys/path_sep", "/"); + + report("Detecting temp dir..."); + logprintf(logdepth, "Finding temp dir (current working directory)...\n"); + + usertmp = get("/arg/sys/tmp"); + + /* . as tmp would fail for commands including a "cd" - this would cause + temporary files left in the target dir. We start out with empty + string (which is ., but on windows ./ would fail), and run + pwd (without cd) to find out the current directory (as getcwd() is not + portable). If pwd fails, we stay with ./ */ + put("sys/tmp", ""); + + /* we need to know about shell backslash problem regardless of how + we end up with tmp - currently tmp is ., where the test could run + safely */ + test_shell_eats_backslash(logdepth+1); + + /* Special case: cross-compilation with emulator; we can not assume + the emulator uses the same paths as the host system, while we mix + accessing files from host and emu. If we stay in ., both emulator + and host system should be (more or less) happy. */ + if (istarget(db_cwd) && iscross) { + if (usertmp == NULL) { + report("using temp dir . for cross-compilation\n"); + logprintf(logdepth, "staying with . for cross-compilation\n"); + } + else { + put("sys/tmp", usertmp); + report("using user supplied temp dir '%s' for cross-compilation\n", usertmp); + logprintf(logdepth, "using user supplied temp dir '%s' for cross-compilation\n", usertmp); + } + return 0; + } + + if ((usertmp != NULL)) + put("sys/tmp", usertmp); + + if ( + ((usertmp != NULL) && (try_tmp_all(logdepth+2))) || /* try user supplied temp dir */ + ((try_get_cwd(logdepth+1, "pwd")) && (try_tmp_all(logdepth+2))) || /* try pwd for finding out cwd */ + ((try_get_cwd(logdepth+1, "echo %cd%") && (try_tmp_all(logdepth+2))))) { /* try windows-specific way for finding out cwd */ + + report(" validated %s\n", get("sys/tmp")); + logprintf(logdepth, "Detected temp dir '%s'\n", get("sys/tmp")); + return 0; + } + + put("sys/tmp", ""); + report("using temp dir fallback .\n"); + logprintf(logdepth, "all temp directories failed, using . as tmp\n"); + return 0; +} + +int test_shell(const char *shell, int logdepth, int quote) +{ + char *test = "echo hello"; + char *cmd; + char *out; + char *q; + + if (quote) + q = "\""; + else + q = ""; + + logprintf(logdepth, "testing '%s' as shell\n", shell); + cmd = malloc(strlen(test) + strlen(shell) + 8); + sprintf(cmd, "%s %s%s%s", shell, q, test, q); + + run(logdepth+1, cmd, &out); + + free(cmd); + + if ((out != NULL) && (strncmp(out, "hello", 5) == 0)) { + put("sys/shell", shell); + if (quote) + put("sys/shell_needs_quote", strue); + else + put("sys/shell_needs_quote", sfalse); + logprintf(logdepth, "accepted.\n"); + free(out); + return 1; + } + + logprintf(logdepth, "refused.\n"); + free(out); + return 0; +} + +static int find_shell_escape(const char *name, int logdepth, int fatal, const char *shell) +{ + char cmdline[256]; + char **t; + char *tests[] = { + "\\", "\\ {}&;|", + "^", "^ &", + NULL, NULL + }; + (void) fatal; /* not used */ + (void) shell; /* not used */ + + report("Looking for a shell escape character... "); + logprintf(logdepth, "finding shell escape character...\n"); + + for(t = tests; *t != NULL; t += 2) { + char *s, *end, *out, *start; + strcpy(cmdline, "echo "); + end = cmdline+5; + for(s = t[1]; *s != '\0'; s++) { + *end++ = *t[0]; + *end++ = *s; + } + *end = '\0'; + run(logdepth+1, cmdline, &out); + if (out != NULL) { + int res; + if (*out == '\"') /* wine likes to wrap the output in quotes for some reason */ + start = out+1; + else + start = out; + + res = strncmp(start, t[1], strlen(t[1])); + free(out); + if (res == 0) { + report("found: '%s'\n", t[0]); + logprintf(logdepth, "found shell escape char '%s'\n", t[0]); + put("sys/shell_escape_char", t[0]); + return 0; + } + } + } + report("NOT FOUND\n"); + logprintf(logdepth, "shell escape character not found\n"); + + return 1; +} + +int find_shell(const char *name, int logdepth, int fatal) +{ + const char *shells[] = { + "/bin/sh -c", + "/bin/bash -c", + "bash -c", + "cmd.exe /c", + "sh -c", + "/bin/dash -c", + "dash -c", + "/bin/ksh -c", + "ksh -c", + NULL + }; + const char **s; + + if (cross_blind) { + const char *shell = get("/arg/sys/target-shell"); + if (shell == NULL) { + report("Need to specify sys/target-shell in blind cross compiling mode, because the shell cannot be detected (note: scconfig will not attempt to run the target shell)\n"); + exit(1); + } + + put("sys/shell", shell); + report("Blind cross compiling: accepting '%s' as shell\n", shell); + logprintf(logdepth, "Blind cross compiling: accepting '%s' as shell\n", shell); + return 0; + } + + report("Looking for a shell... "); + logprintf(logdepth, "finding a shell\n"); + + for(s = shells; *s != NULL; s++) { + if ((test_shell(*s, logdepth+1, 0)) || (test_shell(*s, logdepth+1, 1))) { + report("%s\n", *s); + logprintf(logdepth, "found a shell '%s', need quote: %s\n", *s, get("sys/shell_needs_quote")); + return find_shell_escape(name, logdepth, fatal, *s); + } + } + + report("NOT FOUND\n"); + logprintf(logdepth, "shell not found\n"); + return 1; +} diff --git a/scconfig/src/default/find_target.c b/scconfig/src/default/find_target.c new file mode 100644 index 0000000..2a6993d --- /dev/null +++ b/scconfig/src/default/find_target.c @@ -0,0 +1,58 @@ +/* + scconfig - glue layer for proper crosscompiling to target + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include "db.h" +#include "libs.h" + +int find_target(const char *name, int logdepth, int fatal) +{ + const char *target = get("/arg/sys/target"); + const char *emu = get("/arg/sys/emu"); + + (void) logdepth; /* to suppress compiler warnings about not using logdepth */ + (void) fatal; /* to suppress compiler warnings about not using fatal */ + + /* Does target differ from host? */ + if (target == NULL) { + db_link("/host", "/target"); +#ifdef RUNTIME + db_link("/host", "/runtime"); +#endif + put("/target/sys/cross", sfalse); + put("/target/sys/cross_blind", sfalse); + return 0; + } + else + db_mkdir("/target"); + + put("/target/sys/target", target); + put("/target/sys/cross", strue); + if (emu != NULL) + put("/target/sys/emu", emu); + + /* If so, check if emulator is provided */ + cross_blind = ((emu == NULL) || (*emu == '\0')); + put("/target/sys/cross_blind", cross_blind ? strue : sfalse); + + return 0; +} diff --git a/scconfig/src/default/find_thread.c b/scconfig/src/default/find_thread.c new file mode 100644 index 0000000..4ab793b --- /dev/null +++ b/scconfig/src/default/find_thread.c @@ -0,0 +1,223 @@ +/* + scconfig - detection of standard library features + Copyright (C) 2009,2017 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_lib_lpthread(const char *name, int logdepth, int fatal) +{ + const char *lpthread; + char *s; + int ret = 0; + + char *test_c_recursive = + NL "#define _GNU_SOURCE 1 /* Needed for recursive thread-locking */" + NL "#include " + NL "pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;" + NL "int main() {" + NL " pthread_attr_t a;" + NL " if (pthread_attr_init(&a) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + + char *test_c_simple = + NL "#include " + NL "int main() {" + NL " pthread_attr_t a;" + NL " if (pthread_attr_init(&a) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL ; + + require("cc/cc", logdepth, fatal); + + report("Checking for -lpthread... "); + logprintf(logdepth, "find_lib_lpthread: trying to find lpthread...\n"); + logdepth++; + + lpthread = get("/arg/libs/lpthread"); + + if (lpthread != NULL) { + put("libs/lpthread", lpthread); + report("User provided... "); + s = strclone(lpthread); + } + else + s = strclone("-lpthread"); + + if (try_icl(logdepth, NULL, test_c_recursive, NULL, NULL, s)) { + put("libs/lpthread", s); + put("libs/lpthread-recursive", strue); + report("OK, recursive (%s)\n", s); + } + else if (try_icl(logdepth, NULL, test_c_simple, NULL, NULL, s)) { + put("libs/lpthread", s); + put("libs/lpthread-recursive", sfalse); + report("OK, NOT RECURSIVE (%s)\n", s); + } + else + ret = 1; + + free(s); + return ret; +} + +int find_thread_semget(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "int main()" + NL "{" + NL " int semid = semget(IPC_PRIVATE, 1, IPC_CREAT);" + NL " if (semid < 0) return 0;" + NL " if(semctl(semid, 0, IPC_RMID) < 0) return 0;" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + const char *node = "thread/semget"; + char **inc, *incs[] = {"#include \n#include \n#include ", "#include \n#include ", "#include \n#include ", NULL}; + + if (require("cc/cc", logdepth, fatal)) + return try_fail(logdepth, node); + + report("Checking for semget... "); + logprintf(logdepth, "find_semget:\n"); + logdepth++; + + for(inc = incs; *inc != NULL; inc++) + if (try_icl(logdepth, node, test_c, *inc, NULL, NULL) != 0) + return 0; + + return try_fail(logdepth, node); +} + +int find_thread_pthread_create(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "void* test_thread(void* param)" + NL "{" + NL " return NULL;" + NL "}" + NL "int main()" + NL "{" + NL " pthread_t pt;" + NL " if (pthread_create(&pt, NULL, test_thread, NULL) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + const char *node = "thread/pthread_create"; + char **inc, *incs[] = {"#include ", "#include \n#include ", NULL}; + const char *lpthread; + char* s; + + if (require("cc/cc", logdepth, fatal)) + return try_fail(logdepth, node); + + report("Checking for pthread_create... "); + logprintf(logdepth, "find_pthread_create:\n"); + logdepth++; + + lpthread = get("/arg/libs/lpthread"); + + if (lpthread != NULL) { + report("User provided... "); + s = strclone(lpthread); + } + else + s = strclone("-lpthread"); + + for(inc = incs; *inc != NULL; inc++) + if (try_icl(logdepth, node, test_c, *inc, NULL, s) != 0) { + free(s); + return 0; + } + + free(s); + return try_fail(logdepth, node); +} + +int find_thread_CreateSemaphore(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "int main()" + NL "{" + NL " if (CreateSemaphore(NULL, 1, 1, NULL))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + const char *node = "thread/CreateSemaphore"; + + if (require("cc/cc", logdepth, fatal)) + return try_fail(logdepth, node); + + report("Checking for CreateSemaphore... "); + logprintf(logdepth, "find_thread_CreateSemaphore:\n"); + logdepth++; + + if (try_icl(logdepth, node, test_c, "#include ", NULL, NULL) != 0) + return 0; + + return try_fail(logdepth, node); +} + +int find_thread_CreateThread(const char *name, int logdepth, int fatal) +{ + const char *test_c = + NL "#include " + NL "DWORD WINAPI test_thread(void* param)" + NL "{" + NL " return 0;" + NL "}" + NL "int main()" + NL "{" + NL " if (CreateThread(NULL, 0, test_thread, NULL, 0, NULL))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + const char *node = "thread/CreateThread"; + + if (require("cc/cc", logdepth, fatal)) + return try_fail(logdepth, node); + + report("Checking for CreateThread... "); + logprintf(logdepth, "find_thread_CreateThread:\n"); + logdepth++; + + if (try_icl(logdepth, node, test_c, "#include ", NULL, NULL) != 0) + return 0; + + return try_fail(logdepth, node); +} diff --git a/scconfig/src/default/find_time.c b/scconfig/src/default/find_time.c new file mode 100644 index 0000000..e7189e3 --- /dev/null +++ b/scconfig/src/default/find_time.c @@ -0,0 +1,247 @@ +/* + scconfig - detection of standard library features: time/date/sleep related calls + Copyright (C) 2011..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_time_usleep(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " if (usleep(1) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for usleep()... "); + logprintf(logdepth, "find_time_usleep: trying to find usleep...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/time/usleep", test_c, NULL, NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/time/usleep"); +} + +int find_time_Sleep(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " Sleep(1);" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for Sleep()... "); + logprintf(logdepth, "find_time_Sleep: trying to find Sleep...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/time/Sleep", test_c, "#include ", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/time/Sleep"); +} + +int find_time_gettimeofday(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " struct timeval tv;" + NL " if (gettimeofday(&tv, NULL) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for gettimeofday()... "); + logprintf(logdepth, "find_time_gettimeofday: trying to find gettimeofday...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/time/gettimeofday", test_c, "#include ", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/time/gettimeofday"); +} + + +int find_time_ftime(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " struct timeb tb;" + NL " if (ftime(&tb) == 0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for ftime()... "); + logprintf(logdepth, "find_time_ftime: trying to find ftime...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/time/ftime", test_c, "#include ", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/time/ftime"); +} + +static const char timegm_test_c_template[] = + NL "void my_puts(const char *s);" + NL "int main() {" + NL " struct tm tm;" + NL " tm.tm_sec = 50;" + NL " tm.tm_min = 30;" + NL " tm.tm_hour = 6;" + NL " tm.tm_mday = 1;" + NL " tm.tm_mon = 11;" + NL " tm.tm_year = 2018 - 1900;" + NL " tm.tm_wday = 0;" + NL " tm.tm_yday = 0;" + NL " if (%s(&tm) != (time_t)(-1))" + NL " my_puts(\"OK\");" + NL " return 0;" + NL "}" + NL "#include " + NL "void my_puts(const char *s)" + NL "{" + NL " puts(s);" + NL "}" + NL; + +int find_time_timegm(const char *name, int logdepth, int fatal) +{ + char test_c[1000]; + sprintf(test_c, timegm_test_c_template, "timegm"); + + require("cc/cc", logdepth, fatal); + + report("Checking for timegm()... "); + logprintf(logdepth, "find_time_timegm: trying to find timegm...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/time/timegm", test_c, "#include ", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/time/timegm"); +} + +int find_time_mkgmtime(const char *name, int logdepth, int fatal) +{ + char test_c[1000]; + const char *ldflags[] = {"","-lmsvcr120","-lmsvcr110","-lmsvcr100","-lmsvcr90","-lmsvcr80","-lmsvcr71","-lmsvcr70",NULL}; + const char **ldf; + + sprintf(test_c, timegm_test_c_template, "_mkgmtime"); + + require("cc/cc", logdepth, fatal); + + report("Checking for _mkgmtime()... "); + logprintf(logdepth, "find_time_mkgmtime: trying to find _mkgmtime...\n"); + logdepth++; + + for (ldf = ldflags; *ldf; ++ldf) + if (try_icl(logdepth, "libs/time/_mkgmtime", test_c, "#include ", NULL, *ldf)) + return 0; + return try_fail(logdepth, "libs/time/_mkgmtime"); +} + +int find_time_gmtime_r(const char *name, int logdepth, int fatal) +{ + const char test_c[] = + NL "void my_puts(const char *s);" + NL "int main() {" + NL " time_t tim = 1543645850;" + NL " struct tm tm;" + NL " if (gmtime_r(&tim, &tm)" /* returns '&tm' */ + NL " && 50==tm.tm_sec" + NL " && 30==tm.tm_min" + NL " && 6==tm.tm_hour" + NL " && 1==tm.tm_mday" + NL " && 11==tm.tm_mon" + NL " && (2018-1900)==tm.tm_year)" + NL " my_puts(\"OK\");" + NL " return 0;" + NL "}" + NL "#include " + NL "void my_puts(const char *s)" + NL "{" + NL " puts(s);" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for gmtime_r()... "); + logprintf(logdepth, "find_time_gmtime_r: trying to find gmtime_r...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/time/gmtime_r", test_c, "#include ", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/time/gmtime_r"); +} + +int find_time_gmtime_s(const char *name, int logdepth, int fatal) +{ + const char test_c[] = + NL "void my_puts(const char *s);" + NL "int main() {" + NL " time_t tim = 1543645850;" + NL " struct tm tm;" + NL " if (0==gmtime_s(&tm, &tim)" /* returns errno */ + NL " && 50==tm.tm_sec" + NL " && 30==tm.tm_min" + NL " && 6==tm.tm_hour" + NL " && 1==tm.tm_mday" + NL " && 11==tm.tm_mon" + NL " && (2018-1900)==tm.tm_year)" + NL " my_puts(\"OK\");" + NL " return 0;" + NL "}" + NL "#include " + NL "void my_puts(const char *s)" + NL "{" + NL " puts(s);" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for gmtime_s()... "); + logprintf(logdepth, "find_time_gmtime_s: trying to find gmtime_s...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/time/gmtime_s", test_c, "#include ", NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/time/gmtime_s"); +} diff --git a/scconfig/src/default/find_types.c b/scconfig/src/default/find_types.c new file mode 100644 index 0000000..05c0d84 --- /dev/null +++ b/scconfig/src/default/find_types.c @@ -0,0 +1,404 @@ +/* + scconfig - detection of types and type sizes + Copyright (C) 2012 Tibor Palinkas + Copyright (C) 2017-2018 Aron Barath + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +/* assume there is no integer that is at least this wide, in bytes */ +#define MAX_INT_WIDTH 9 +/* ('long double' can be 16 bytes; see https://en.wikipedia.org/wiki/Long_double) */ +#define MAX_FLT_WIDTH 17 + +static int try_size(int logdepth, char *cflags, char *ldflags, const char *type, int use_stdint, const char *path, const char **sizearr, unsigned char *inc_stdint, const int max_width) +{ + char *out = NULL; + const char *test_c_template = + NL "#include " + NL "int main() {" + NL " printf(\"OK %%d\\n\", sizeof(%s));" + NL " return 0;" + NL "}" + NL; + char test_c[512], *start; + const char *inc = "#include \n"; + int size; + + if (use_stdint) { + strcpy(test_c, inc); + start = test_c + strlen(inc); + } + else + start = test_c; + sprintf(start, test_c_template, type); + + report("Testing size of type %25s... ", type); + + logprintf(logdepth, "trying size with ldflags '%s'\n", ldflags == NULL ? get("cc/ldflags") : ldflags); + if (compile_run(logdepth+1, test_c, NULL, cflags, ldflags, &out) == 0) { + if (target_emu_fail(out)) { + report(" FAIL (emulator)\n"); + free(out); + return -1; + } + + if (strncmp(out, "OK", 2) == 0) { + size = atoi(out+3); + if ((size > 0) && (size < max_width)) { + sprintf(test_c, "%d", size); + put(path, test_c); + sizearr[size] = type; + if (inc_stdint != NULL) + inc_stdint[size] = use_stdint; + report(" OK, size %d byte%s\n", size, (size > 1) ? "s" : ""); + } + else { + report(" FAIL, size %d bytes\n", size); + size = -1; + } + free(out); + return size; + } + free(out); + } + report(" FAIL (compile)\n"); + return -1; +} + +int find_types_stdint(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "#include " + NL "int main() {" + NL " if (sizeof(uint8_t) == 1)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for stdint.h... "); + logprintf(logdepth, "find_types_stdint: trying to find stdint.h...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/types/stdint", test_c, NULL, NULL, NULL)) + return 0; + return try_fail(logdepth, "libs/types/stdint"); +} + +int find_types_sizes(const char *name, int logdepth, int fatal) +{ + const char *stdint; + const char *sizearr_u[MAX_INT_WIDTH]; + const char *sizearr_s[MAX_INT_WIDTH]; + const char *sizearr_f[MAX_FLT_WIDTH]; + unsigned char inc_stdint_u[MAX_INT_WIDTH]; + unsigned char inc_stdint_s[MAX_INT_WIDTH]; + int n; + const char *includes = ""; + const char *path_template = "sys/types/size/%d_%c_int"; + const char *path_template_f = "sys/types/size/%d_float"; + char path[64]; + + require("cc/cc", logdepth, fatal); + require("libs/types/stdint/presents", logdepth, 0); + stdint = get("libs/types/stdint/presents"); + + for(n = 0; n < MAX_INT_WIDTH; n++) { + sizearr_u[n] = NULL; + sizearr_s[n] = NULL; + inc_stdint_u[n] = 0; + inc_stdint_s[n] = 0; + } + for(n = 0; n < MAX_FLT_WIDTH; n++) + sizearr_f[n] = NULL; + + try_size(logdepth+1, NULL, NULL, "unsigned long long int", 0, "sys/types/size/unsigned_long_long_int", sizearr_u, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "unsigned char", 0, "sys/types/size/unsigned_char", sizearr_u, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "unsigned short int", 0, "sys/types/size/unsigned_short_int", sizearr_u, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "unsigned int", 0, "sys/types/size/unsigned_int", sizearr_u, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "unsigned long int", 0, "sys/types/size/unsigned_long_int", sizearr_u, NULL, MAX_INT_WIDTH); + + try_size(logdepth+1, NULL, NULL, "signed long long int", 0, "sys/types/size/signed_long_long_int", sizearr_s, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "signed char", 0, "sys/types/size/signed_char", sizearr_s, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "signed short int", 0, "sys/types/size/signed_short_int", sizearr_s, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "signed int", 0, "sys/types/size/signed_int", sizearr_s, NULL, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "signed long int", 0, "sys/types/size/signed_long_int", sizearr_s, NULL, MAX_INT_WIDTH); + + if ((stdint != NULL) && (istrue(stdint))) { + try_size(logdepth+1, NULL, NULL, "uint8_t", 1, "sys/types/size/uint8_t", sizearr_u, inc_stdint_u, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "uint16_t", 1, "sys/types/size/uint16_t", sizearr_u, inc_stdint_u, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "uint32_t", 1, "sys/types/size/uint32_t", sizearr_u, inc_stdint_u, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "uint64_t", 1, "sys/types/size/uint64_t", sizearr_u, inc_stdint_u, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "int8_t", 1, "sys/types/size/int8_t", sizearr_s, inc_stdint_s, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "int16_t", 1, "sys/types/size/int16_t", sizearr_s, inc_stdint_s, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "int32_t", 1, "sys/types/size/int32_t", sizearr_s, inc_stdint_s, MAX_INT_WIDTH); + try_size(logdepth+1, NULL, NULL, "int64_t", 1, "sys/types/size/int64_t", sizearr_s, inc_stdint_s, MAX_INT_WIDTH); + } + + try_size(logdepth+1, NULL, NULL, "float", 0, "sys/types/size/float", sizearr_f, NULL, MAX_FLT_WIDTH); + try_size(logdepth+1, NULL, NULL, "double", 0, "sys/types/size/double", sizearr_f, NULL, MAX_FLT_WIDTH); + try_size(logdepth+1, NULL, NULL, "long double", 0, "sys/types/size/long_double", sizearr_f, NULL, MAX_FLT_WIDTH); + + for(n = 0; n < MAX_INT_WIDTH; n++) { + if (sizearr_u[n] != NULL) { + report("Found best fit %d bytes wide uint: %s\n", n, sizearr_u[n]); + sprintf(path, path_template, n, 'u'); + put(path, sizearr_u[n]); + if (inc_stdint_u[n]) + includes = "#include "; + } + if (sizearr_s[n] != NULL) { + report("Found best fit %d bytes wide sint: %s\n", n, sizearr_s[n]); + sprintf(path, path_template, n, 's'); + put(path, sizearr_s[n]); + if (inc_stdint_s[n]) + includes = "#include "; + } + } + for(n = 0; n < MAX_FLT_WIDTH; n++) { + if (sizearr_f[n] != NULL) { + report("Found best fit %d bytes wide float: %s\n", n, sizearr_f[n]); + sprintf(path, path_template_f, n); + put(path, sizearr_f[n]); + } + } + + put("sys/types/size/presents", strue); /* to avoid redetection */ + put("sys/types/size/includes", includes); + + return 0; +} + + +int find_types_something_t(const char *name, int logdepth, int fatal, const char *prefix, const char *typ, const char *define, const char *try_include) +{ + char *out = NULL; + int res; + char test_c[512]; + char node[256], *nodeend; + const char **include, *includes[] = {"", "#include ", "#include ", "#include ", "#include ", "#include ", "#include ", NULL}; + const char ** const first_include = (try_include && *try_include) ? includes : (includes+1); + + char *test_c_include = + NL "%s" + NL "int my_puts(const char *s);" + NL "%s" + NL "int main() {" + NL " %s s;" + NL " my_puts(\"OK\");" + NL " return 0;" + NL "}" + NL "#include " + NL "int my_puts(const char *s)" + NL "{" + NL " return puts(s);" + NL "}" + NL; + + char *test_c_size = + NL "%s" + NL "#include " + NL "%s" + NL "int main() {" + NL " printf(\"%%d\", sizeof(%s));" + NL " return 0;" + NL "}" + NL; + + char *test_c_broken = + NL "%s" + NL "int my_puts(const char *s);" + NL "%s" + NL "int main() {" + NL " %s s;" + NL " void *v;" + NL " if (sizeof(v) != sizeof(s)) my_puts(\"yes\");" + NL " else my_puts(\"no\");" + NL " return 0;" + NL "}" + NL "#include " + NL "int my_puts(const char *s)" + NL "{" + NL " return puts(s);" + NL "}" + NL; + + includes[0] = try_include; + + require("cc/cc", logdepth, fatal); + + report("Checking for type %s... ", typ); + logprintf(logdepth, "find_types_something_t: Checking for %s...\n", typ); + logdepth++; + + sprintf(node, "%s/%s", prefix, typ); + nodeend = node + strlen(node); + + /* replace '*' at the end of the node path with _ptr so it can be saved in the tree */ + if (nodeend[-1] == '*') { + nodeend--; + while((nodeend > node) && (*nodeend == ' ')) nodeend--; + strcpy(nodeend-1, "_ptr"); + nodeend+=4; + } + + nodeend[0] = '/'; + nodeend[1] = '\0'; + nodeend++; + + if (define == NULL) + define = ""; + + for(include = first_include; *include != NULL; include++) { + sprintf(test_c, test_c_include, define, *include, typ); + if ((compile_run(logdepth, test_c, NULL, NULL, NULL, &out) == 0) && (strncmp(out, "OK", 2) == 0)) { + report("Found; "); + logprintf(logdepth+1, "include %s works\n", *include); + sprintf(nodeend, "includes"); + if (define) { + put(node, define); + append(node, "\\n"); + append(node, *include); + } else + put(node, *include); + break; + } + logprintf(logdepth+1, "include %s fails\n", *include); + if (out != NULL) + free(out); + } + if (*include == NULL) { + report("Not found\n"); + return 1; + } + + sprintf(nodeend, "presents"); + put(node, strue); + + /* check if typ is broken (smaller than void *) */ + sprintf(test_c, test_c_broken, define, *include, typ); + if (compile_run(logdepth, test_c, NULL, NULL, NULL, &out) == 0) { + if ((out != NULL) && (strncmp(out, "yes", 3) == 0)) { + report("(%s is narrower than void *)\n", typ); + sprintf(nodeend, "broken"); + put(node, strue); + res = 0; + } + else if ((out != NULL) && (strncmp(out, "no", 2) == 0)) { + report("(%s is not narrower than void *)\n", typ); + sprintf(nodeend, "broken"); + put(node, sfalse); + res = 0; + } + else { + report("ERROR: test failed (%s)\n", out); + res = 1; + } + } + if (out != NULL) + free(out); + + if (res == 0) { + report("Checking for size of %s... ", typ); + sprintf(test_c, test_c_size, define, *include, typ); + if (compile_run(logdepth, test_c, NULL, NULL, NULL, &out) == 0) { + if (out != NULL) { + report("(sizeof %s is %s)\n", typ, out); + sprintf(nodeend, "size"); + put(node, out); + } + } + if (out != NULL) + free(out); + } + + return res; +} + + +int find_types_size_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "size_t", NULL, NULL); +} + +int find_types_off_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "off_t", NULL, NULL); +} + +int find_types_off64_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "off64_t", NULL, NULL) && + find_types_something_t(name, logdepth, fatal, "sys/types", "off64_t", "#define _LARGEFILE64_SOURCE", NULL); +} + +int find_types_gid_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "gid_t", NULL, NULL); +} + +int find_types_uid_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "uid_t", NULL, NULL); +} + +int find_types_pid_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "pid_t", NULL, NULL); +} + +int find_types_mode_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "mode_t", NULL, NULL); +} + +int find_types_nlink_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "nlink_t", NULL, NULL); +} + +int find_types_ptrdiff_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "ptrdiff_t", NULL, NULL); +} + +int find_types_dev_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "dev_t", NULL, NULL); +} + +int find_types_ino_t(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "ino_t", NULL, NULL); +} + +int find_types_void_ptr(const char *name, int logdepth, int fatal) +{ + return find_types_something_t(name, logdepth, fatal, "sys/types", "void *", NULL, NULL); +} diff --git a/scconfig/src/default/find_uname.c b/scconfig/src/default/find_uname.c new file mode 100644 index 0000000..6bf1d82 --- /dev/null +++ b/scconfig/src/default/find_uname.c @@ -0,0 +1,355 @@ +/* + scconfig - evaluate uname and classify the system + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "regex.h" +#include "log.h" +#include "db.h" +#include "libs.h" +#include "dep.h" + +static void sys_unix(void) +{ + put("sys/ext_exe", ""); + put("sys/ext_dynlib", ".so"); + put("sys/ext_stalib", ".a"); + put("sys/ext_dynlib_native", ".so"); +} + +static void sys_netbsd(void) +{ + sys_unix(); + put("cc/ldflags", "-Wl,-R/usr/pkg/lib -L/usr/pkg/lib"); /* TODO: is this the best way? */ +} + +static void sys_win32dlc(void) +{ + put("sys/ext_exe", ".exe"); + put("sys/ext_dynlib", ".dlc"); + put("sys/ext_stalib", ".a"); + put("sys/ext_dynlib_native", ".dll"); +} + +typedef void (*callback_t)(void); + +typedef struct { + char *uname_regex; + char *name; + char *class; + + callback_t callback; +} uname_t; + +typedef struct { + char *file_name; + char *name; + char *class; + + callback_t callback; +} magic_file_t; + +/* Guess system class by uname; class is informative, nothing important + should depend on it. + Order *does* matter */ +uname_t unames[] = { + {"[Nn]et[Bb][Ss][Dd]", "NetBSD", "UNIX", sys_netbsd}, + {"[Ll]inux", "Linux", "UNIX", sys_unix}, + {"[Bb][Ss][Dd]", "BSD", "UNIX", sys_unix}, + {"SunOS", "SunOS", "UNIX", sys_unix}, + {"OSF1", "OSF", "UNIX", sys_unix}, /* TODO: note the difference in cflags for debugging ("-ms -g") */ + {"IRIX", "IRIX", "UNIX", sys_unix}, + {"SunOS", "SunOS", "UNIX", sys_unix}, + {"[Mm]inix", "Minix", "UNIX", sys_unix}, + {"[Aa][Rr][Oo][Ss]", "Aros", "UNIX", sys_unix}, + {"^Darwin", "MacOSX", "UNIX", sys_unix}, + {"[Th]hreos", "Threos", "UNIX", sys_unix}, + {"[Cc]ygwin", "cygwin", "WIN32", sys_win32dlc}, + {"[Mm][Ii][Nn][Gg][Ww]", "mingw", "WIN32", sys_win32dlc}, + {"win32", "win32", "WIN32", sys_win32dlc}, /* vanilla windows */ + {NULL, NULL, NULL, NULL} +}; + +/* Fallback: extract machine name from uname -a if uname -m fails */ +static const char *machine_names[] = { + "i[0-9]86[^ ]*", "x86_[^ ]*", "amd[0-9]*", "armv[0-9][^ ]*", "ppc[0-9]+", + "sparc[0-9]*", "BePC", "ia64", "x86", "IP[0-9]*", "k1om", "sun4u", + "RM600", "R4000", "alpha", + NULL +}; + +/* Fallback: extract system name from uname -a if uname -s fails */ +static const char *system_names[] = { + "[Ll]inux", "sn5[0-9]*", "CYGWIN_NT[^ ]*", "GNU[^ ]*", "DragonFly", + "[^ ]*BSD[^ ]*", "Haiku", "HP-UX", "AIX", "OS4000", "Interix", + "IRIX[0-9]*", "Darwin", "Minix", "MINGW[^ ]*", "ReliantUNIX[^ ]*", + "SunOS", "OSF1", "ULTRIX", "UWIN-W7", "IS/WB", "OS/390", + "SCO[^ ]*", "QNX", + NULL +}; + +/* Fallback: if uname -a fails, guess system by looking at "magic file names" */ +magic_file_t magic_files[] = { + {"/dev/null", "UNIX", "UNIX", sys_unix}, + {"c:\\config.sys", "win32", "WIN32", sys_win32dlc}, + {"c:\\windows\\system.ini", "win32", "WIN32", sys_win32dlc}, + {"c:\\windows\\win.ini", "win32", "WIN32", sys_win32dlc}, + {"c:\\windows\\notepad.exe", "win32", "WIN32", sys_win32dlc}, + {NULL, NULL, NULL, NULL} +} ; + +static int match(const char *regex, const char *str) +{ + re_comp(regex); + return re_exec(str); +} + +/* match uname against each pattern on the list; on a match, put() the portion + of the string matched in node and return 1 */ +int uname_guess(const char *node, const char *uname, const char *list[]) +{ + const char **l; + if (uname == NULL) + return 0; + for(l = list; *l != NULL; l++) { + if (match(*l, uname)) { + char *s; + int len = eopat[0] - bopat[0]; + s = malloc(len+1); + memcpy(s, bopat[0], len); + s[len] = '\0'; + put(node, s); + return 1; + } + } + return 0; +} + +/* Don't worry about linear search or matching regexes all the time - this + function will be run at most two times */ +static callback_t lookup_uname(char **uname, const char **name, const char **class) +{ + uname_t *u; + for(u = unames; u->uname_regex != NULL; u++) { + if ( + ((*uname != NULL) && (match(u->uname_regex, *uname))) /* uname match */ + || ((*name != NULL) && ((strcmp(u->name, *name) == 0))) /* name match */ + || ((*class != NULL) && ((strcmp(u->class, *class) == 0))) /* class match */ + ) { + if (*name == NULL) *name = u->name; + if (*class == NULL) *class = u->class; + return u->callback; + } + } + return NULL; +} + +static callback_t lookup_magic_file(int logdepth, const char **name, const char **class) +{ + magic_file_t *u; + for(u = magic_files; u->file_name != NULL; u++) { + if (is_file(u->file_name)) { + logprintf(logdepth, "%s -> %s\n", u->file_name, u->class); + + if (*name == NULL) *name = u->name; + if (*class == NULL) *class = u->class; + return u->callback; + } + } + return NULL; +} + +int find_uname(const char *rname, int logdepth, int fatal) +{ + const char *name, *class, *tname, *uname_orig; + char *s, *uname, *mname, *sname; + void (*callback)(void); + + require("sys/tmp", logdepth, fatal); + + if (istarget(db_cwd)) + require("/target/sys/target", logdepth, fatal); + + report("Checking for system type... "); + logprintf(logdepth, "[find_uname] checking for sys/name\n"); + logdepth++; + + tname = get("/arg/sys/target-name"); + if (istarget(db_cwd) && (tname != NULL)) + put("sys/name", tname); + + tname = get("/arg/sys/target-uname"); + if (istarget(db_cwd) && (tname != NULL)) + put("sys/uname", tname); + + name = get("sys/name"); + uname_orig = get("sys/uname"); + + if (name == NULL) { + if (uname_orig == NULL) { + logprintf(logdepth, "not set, running\n"); + run_shell(logdepth, "uname -a", (char **)&uname); + if (uname != NULL) { + for(s = uname; *s != '\0'; s++) + if ((*s == '\n') || (*s == '\r')) *s = ' '; + put("sys/uname", uname); + } + else + put("sys/uname", ""); + + if (run_shell(logdepth, "uname -m", (char **)&mname) == 0) + put("sys/machine_name", strip(mname)); + else + put("sys/machine_name", NULL); + + if (mname != NULL) + free(mname); + + if (run_shell(logdepth, "uname -o", (char **)&sname) == 0) + put("sys/system_name", strip(sname)); + else if (run_shell(logdepth, "uname -s", (char **)&sname) == 0) + put("sys/system_name", strip(sname)); + else + put("sys/system_name", NULL); + if (sname != NULL) + free(sname); + } + + /* we have uname by now, set sys/name */ + name = NULL; + class = NULL; + callback = lookup_uname(&uname, &name, &class); + if (name == NULL) { + /* no uname or unknown system by uname - fallback: check for cross target */ + const char *target = get("/arg/sys/target"); + if ((target != NULL) && (strstr(target, "mingw") != NULL)) { + name = "WIN32"; + report("(detected mingw cross compilation to WIN32)\n"); + } + else { + report("Warning: unknown system\n"); + name = "unknown"; + } + } + put("sys/name", name); + } + else { + /* we had sys/name, that should be enough */ + uname = NULL; + class = name; + callback = lookup_uname(&uname, &name, &class); + } + + /* predefined and/or detected uname failed, try magic file method */ + if (callback == NULL) { + logprintf(logdepth, "System class is unknown by uname, running heuristics...\n"); + report("System class is unknown by uname, running heuristics... "); + + callback = lookup_magic_file(logdepth + 1, &name, &class); + } + + + if (callback == NULL) { + /* System unknown. */ + error("Unknown system '%s'\n", get("sys/uname")); + abort(); + } + + callback(); + report("OK (name: %s; class: %s)\n", name, class); + put("sys/class", class); + + /* fallbacks */ + if (get("sys/machine_name") == NULL) + uname_guess("sys/machine_name", uname, machine_names); + + if (get("sys/system_name") == NULL) + uname_guess("sys/system_name", uname, system_names); + + /* on windows, overwrite the path sep with the right amount of \ (the tmp finder may have left / in it) */ + if (strcmp(class, "WIN32") == 0) { + int eats = istrue(get("sys/shell_eats_backslash")); + + if (eats) + put("sys/path_sep", "\\\\\\\\"); + else + put("sys/path_sep", "\\"); + } + + return 0; +} + +static int find_triplet_(const char *name, int logdepth, int fatal, const char *nodename, int include_vendor, char *sep, char *esc) +{ + const char *machine, *vendor, *os; + char *triplet, *s; + char fake_sep[2]; + + fake_sep[0] = 1; + fake_sep[1] = 0; + + require("sys/uname", logdepth, fatal); + + machine = get("sys/machine_name"); + if (machine == NULL) + machine = "unknown"; + + vendor = "unknown"; + + os = get("sys/system_name"); + if (os == NULL) + os = "unknown"; + + if (include_vendor) + triplet = str_concat(fake_sep, machine, vendor, os, NULL); + else + triplet = str_concat(fake_sep, machine, os, NULL); + + for(s = triplet; *s != '\0'; s++) { + if ((esc != NULL) && (*s == *sep)) + *s = *esc; + if (isalnum(*s)) + *s = tolower(*s); + else { + if (*s == *fake_sep) + *s = *sep; + else if (esc != NULL) + *s = *esc; + else + *s = '-'; + } + } + put(nodename, triplet); + free(triplet); + return 0; +} + +int find_triplet(const char *name, int logdepth, int fatal) +{ + return find_triplet_(name, logdepth, fatal, "sys/triplet", 1, "-", NULL); +} + +int find_sysid(const char *name, int logdepth, int fatal) +{ + return find_triplet_(name, logdepth, fatal, "sys/sysid", 0, "-", "_"); +} diff --git a/scconfig/src/default/hooks.h b/scconfig/src/default/hooks.h new file mode 100644 index 0000000..96d826d --- /dev/null +++ b/scconfig/src/default/hooks.h @@ -0,0 +1,33 @@ +/* Runs when a custom command line argument is found + returns true if no further argument processing should be done */ +int hook_custom_arg(const char *key, const char *value); + +/* If any of the int hooks return non-zero, that means failure and stops + the whole process */ + +/* Runs before anything else */ +int hook_preinit(void); + +/* Runs after initialization */ +int hook_postinit(void); + +/* Runs after all arguments are read and parsed */ +int hook_postarg(void); + +/* Runs when things should be detected for the host system (tools compiled for and/or run on compilation host) */ +int hook_detect_host(void); + +/* Runs when things should be detected for the target system (tools compiled on the compilation host but running on the target)*/ +int hook_detect_target(void); + +/* Runs when things should be detected for the runtime system (tools that will run only on the target, production runtime, not during compilation or installation) */ +int hook_detect_runtime(void); + +/* Runs after detection hooks, should generate the output (Makefiles, etc.) */ +int hook_generate(void); + +/* Runs before everything is uninitialized */ +void hook_preuninit(void); + +/* Runs at the very end, when everything is already uninitialized */ +void hook_postuninit(void); diff --git a/scconfig/src/default/ht.c b/scconfig/src/default/ht.c new file mode 100644 index 0000000..b7955cf --- /dev/null +++ b/scconfig/src/default/ht.c @@ -0,0 +1,257 @@ +/* + scconfig - hash tables + Copyright (C) 2007, 2008, 2009 by Szabolcs Nagy + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include +#include "libs.h" +#include "ht.h" + +#define HT_MINSIZE 8 +#define HT_MAXSIZE ((UINT_MAX >> 1U) + 1U) + +#define JUMP(i, j) i += j++ +#define JUMP_FIRST(i, j) j = 1, i += j++ + +/* portable str hash */ +#define HASH_INIT 0xbabeface +static unsigned int keyhash(const char *key) { + unsigned int a = HASH_INIT; + + while (*key) + a += (a << 5) + *key++; + return a; +} + +/* fill threshold = 3/4 */ +#define HT_LOG_THRES 2 +static void checkfill(ht_t *ht) +{ + if (ht->fill > ht->mask - (ht->mask >> HT_LOG_THRES) || ht->fill > ht->used << 2) + ht_resize(ht, ht->used << (ht->used > 1U << 15 ? 1 : 2)); +} + +static ht_t *ht_init(ht_t *ht, int isstr) +{ + ht->mask = HT_MINSIZE - 1; + ht->fill = 0; + ht->used = 0; + ht->isstr = isstr; + ht->table = calloc(ht->mask + 1, sizeof(ht_entry_t)); + ht->refcount = 1; + return ht; +} + +static ht_t *ht_uninit(ht_t *ht) +{ + ht_entry_t *entry; + + for (entry = ht->table; entry != ht->table + ht->mask + 1; entry++) + if (ht_isused(entry)) { + if (ht->isstr) + free(entry->value); + free(entry->key); + } + free(ht->table); + return ht; +} + +ht_t *ht_alloc(int isstr) +{ + ht_t *ht; + + ht = malloc(sizeof(ht_t)); + return ht_init(ht, isstr); +} + +void ht_free(ht_t *ht) +{ + ht_uninit(ht); + free(ht); +} + +ht_t *ht_clear(ht_t *ht) +{ + ht_uninit(ht); + return ht_init(ht, ht->isstr); +} + +/* one lookup function to rule them all */ +static ht_entry_t *ht_lookup(ht_t *ht, const char *key, unsigned int hash) +{ + unsigned int mask; + unsigned int i; + unsigned int j; + ht_entry_t *table; + ht_entry_t *entry; + ht_entry_t *free_entry; + + mask = ht->mask; + i = hash; + table = ht->table; + assert(ht->table); + entry = table + (i & mask); + if (ht_isempty(entry) || !strcmp(entry->key, key)) + return entry; + /* free_entry: first deleted entry for insert */ + free_entry = ht_isdeleted(entry) ? entry : NULL; + assert(ht->fill <= ht->mask); + for (JUMP_FIRST(i, j); ; JUMP(i, j)) { + entry = table + (i & mask); + if (ht_isempty(entry)) + return (free_entry == NULL) ? entry : free_entry; + if (entry->hash == hash && !strcmp(entry->key, key)) + return entry; + if (ht_isdeleted(entry) && free_entry == NULL) + free_entry = entry; + } +} + +/* for resize: no deleted entries in ht, entry->key is not in ht, no strdup */ +static void cleaninsert(ht_t *ht, const ht_entry_t *entry) +{ + unsigned int i; + unsigned int j; + ht_entry_t *newentry; + + i = entry->hash; + newentry = ht->table + (i & ht->mask); + if (!ht_isempty(newentry)) + for (JUMP_FIRST(i, j); !ht_isempty(newentry); JUMP(i, j)) + newentry = ht->table + (i & ht->mask); + ++ht->fill; + ++ht->used; + memcpy(newentry, entry, sizeof(ht_entry_t)); +} + +ht_t *ht_resize(ht_t *ht, unsigned int hint) +{ + unsigned int newsize; + unsigned int oldused; + ht_entry_t *oldtable, *newtable, *entry; + + oldused = ht->used; + if (hint < oldused << 1) + hint = oldused << 1; + assert(hint <= HT_MAXSIZE && hint > oldused); + for (newsize = HT_MINSIZE; newsize < hint; newsize <<= 1); + newtable = calloc(newsize, sizeof(ht_entry_t)); + oldtable = ht->table; + ht->mask = newsize - 1; + ht->fill = 0; + ht->used = 0; + ht->table = newtable; + for (entry = oldtable; oldused > 0; ++entry) + if (ht_isused(entry)) { + --oldused; + cleaninsert(ht, entry); + } + free(oldtable); + return ht; +} + +void *ht_get(ht_t *ht, const char *key) +{ + ht_entry_t *entry; + + entry = ht_lookup(ht, key, keyhash(key)); + return ht_isused(entry) ? entry->value : NULL; +} + +void *ht_insert(ht_t *ht, const char *key, void *value) +{ + unsigned int hash; + ht_entry_t *entry; + + hash = keyhash(key); + entry = ht_lookup(ht, key, hash); + if (ht_isused(entry)) + return entry->value; + if (ht_isempty(entry)) + ++ht->fill; + ++ht->used; + entry->hash = hash; + entry->key = strclone(key); + entry->value = ht->isstr ? strclone(value) : value; + checkfill(ht); + return NULL; +} + +const char *ht_set(ht_t *ht, const char *key, void *value) +{ + unsigned int hash; + ht_entry_t *entry; + char *k; + + hash = keyhash(key); + entry = ht_lookup(ht, key, hash); + if (ht_isused(entry)) { + if (ht->isstr) { + free(entry->value); + entry->value = strclone(value); + } else + entry->value = value; + return entry->key; + } + if (ht_isempty(entry)) + ++ht->fill; + ++ht->used; + entry->hash = hash; + entry->key = k = strclone(key); + entry->value = ht->isstr ? strclone(value) : value; + checkfill(ht); + return k; +} + +const char *ht_del(ht_t *ht, const char *key) +{ + ht_entry_t *entry; + + entry = ht_lookup(ht, key, keyhash(key)); + if (!ht_isused(entry)) + return NULL; + --ht->used; + free(entry->key); + if (ht->isstr) + free(entry->value); + entry->key = ht_deleted_key; + return ht_deleted_key; +} + +ht_entry_t *ht_first(const ht_t *ht) +{ + ht_entry_t *entry = 0; + + if (ht->used) + for (entry = ht->table; !ht_isused(entry); ++entry); + return entry; +} + +ht_entry_t *ht_next(const ht_t *ht, ht_entry_t *entry) +{ + while (++entry != ht->table + ht->mask + 1) + if (ht_isused(entry)) + return entry; + return 0; +} diff --git a/scconfig/src/default/ht.h b/scconfig/src/default/ht.h new file mode 100644 index 0000000..ff9a61b --- /dev/null +++ b/scconfig/src/default/ht.h @@ -0,0 +1,51 @@ +#ifndef STR_HT_H +#define STR_HT_H + +/* char * -> void * open addressing hashtable */ +/* keys and values are strdupped (strcloned) */ + +#define ht_deleted_key ((char *)1) +#define ht_isused(e) ((e)->key && (e)->key != ht_deleted_key) +#define ht_isempty(e) (((e)->key == NULL) || (e)->key == ht_deleted_key) +#define ht_isdeleted(e) ((e)->key == ht_deleted_key) + +typedef struct { + unsigned int hash; + char *key; + void *value; +} ht_entry_t; + +typedef struct { + unsigned int mask; + unsigned int fill; + unsigned int used; + int isstr; + ht_entry_t *table; + int refcount; +} ht_t; + +ht_t *ht_alloc(int isstr); +void ht_free(ht_t *ht); +ht_t *ht_clear(ht_t *ht); +ht_t *ht_resize(ht_t *ht, unsigned int hint); + +/* value of ht[key], NULL if key is empty or deleted */ +void *ht_get(ht_t *ht, const char *key); +/* ht[key] = value and return NULL or return ht[key] if key is already used */ +void *ht_insert(ht_t *ht, const char *key, void *value); +/* ht[key] = value and return a pointer to the strdupped key */ +const char *ht_set(ht_t *ht, const char *key, void *value); +/* delete key and return ht_deleted_key or NULL if key was not used */ +const char *ht_del(ht_t *ht, const char *key); + +/* iteration */ +#define foreach(ht, e) \ + for (e = (ht)->table; e != (ht)->table + (ht)->mask + 1; e++) \ + if (ht_isused(e)) + +/* first used (useful for iteration) NULL if empty */ +ht_entry_t *ht_first(const ht_t *ht); +/* next used (useful for iteration) NULL if there is no more used */ +ht_entry_t *ht_next(const ht_t *ht, ht_entry_t *entry); + +#endif diff --git a/scconfig/src/default/lib_compile.c b/scconfig/src/default/lib_compile.c new file mode 100644 index 0000000..12664fc --- /dev/null +++ b/scconfig/src/default/lib_compile.c @@ -0,0 +1,332 @@ +/* + scconfig - library functions for compiling and running test code + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include +#include +#include "log.h" +#include "libs.h" +#include "db.h" +#include "dep.h" + +/* +#define KEEP_TEST_SRCS +*/ + +int cross_blind = 0; + + +static char *clone_flags(const char *input, const char *node) +{ + char *output; + const char *s; + int len; + + if (input != NULL) { + if (*input == '+') { + s = get(node); + if (s != NULL) { + len = strlen(s); + output = malloc(len + strlen(input) + 4); + memcpy(output, s, len); + output[len] = ' '; + strcpy(output + len + 1, input + 1); + } + else + output = strclone(input); + } + else + output = strclone(input); + } + else { + s = get(node); + if (s != NULL) + output = strclone(s); + else + output = strclone(""); + } + return output; +} + +int compile_file_raw(int logdepth, const char *fn_input, char **fn_output, const char *cc, const char *cflags, const char *ldflags) +{ + char cmd[2048]; + char *cc_esc, *fn_input_esc, *fn_output_esc, *temp_out_esc, *temp_out; + int ret; + + temp_out = tempfile_new(".out"); + + if (*fn_output == NULL) + *fn_output = tempfile_new(get("sys/ext_exe")); + else + *fn_output = tempfile_new(*fn_output); + unlink(*fn_output); + + cc_esc = shell_escape_dup(cc == NULL ? get("cc/cc") : cc); + fn_input_esc = shell_escape_dup(fn_input); + fn_output_esc = shell_escape_dup(*fn_output); + temp_out_esc = shell_escape_dup(temp_out); + + sprintf(cmd, "%s \"%s %s %s %s -o %s 2>&1\" >%s", get("/host/sys/shell"), cc_esc, cflags, fn_input_esc, ldflags, fn_output_esc, temp_out_esc); + + free(cc_esc); + free(fn_input_esc); + free(fn_output_esc); + free(temp_out_esc); + + logprintf(logdepth, "compile: '%s'\n", cmd); + ret = system(cmd); + log_merge(logdepth + 1, temp_out); +#ifndef KEEP_TEST_SRCS + unlink(temp_out); +#endif + free(temp_out); + logprintf(logdepth, "compile result: %d\n", ret); + + return ret; +} + +int compile_file(int logdepth, const char *fn_input, char **fn_output, const char *cc, const char *cflags, const char *ldflags) +{ + int ret; + char *ldflags_, *cflags_; + + cflags_ = clone_flags(cflags, "cc/cflags"); + ldflags_ = clone_flags(ldflags, "cc/ldflags"); + + ret = compile_file_raw(logdepth, fn_input, fn_output, cc, cflags_, ldflags_); + + free(cflags_); + free(ldflags_); + + return ret; +} + +int compile_code(int logdepth, const char *testcode, char **fn_output, const char *cc, const char *cflags, const char *ldflags) +{ + char *temp_in; + int ret; + + require("sys/ext_exe", logdepth, 1); + + assert(testcode != NULL); + assert(fn_output != NULL); + + temp_in = tempfile_dump(testcode, ".c"); + ret = compile_file(logdepth, temp_in, fn_output, cc, cflags, ldflags); +#ifndef KEEP_TEST_SRCS + unlink(temp_in); +#endif + free(temp_in); + + return ret; +} + +int compile_code_raw(int logdepth, const char *testcode, char **fn_output, const char *cc, const char *cflags, const char *ldflags) +{ + char *temp_in; + int ret; + + require("sys/ext_exe", logdepth, 1); + + assert(testcode != NULL); + assert(fn_output != NULL); + + temp_in = tempfile_dump(testcode, ".c"); + ret = compile_file_raw(logdepth, temp_in, fn_output, cc, cflags, ldflags); +#ifndef KEEP_TEST_SRCS + unlink(temp_in); +#endif + free(temp_in); + + return ret; +} + +char *shell_escape_dup(const char *in) +{ + char *o, *out; + const char *i; + const char *esc = get("sys/shell_escape_char"); + + /* in the early phase, before detecting the shell, this happens */ + if (esc == NULL) + return strclone(in); + + out = malloc(strlen(in)*2+1); + for(i = in, o = out; *i != '\0'; i++) { + if (*i == *esc) { + *o++ = *esc; + } + else if (!isalnum(*i)) { + switch(*i) { + case '/': + case '_': + case '-': + case '.': + break; + default: + *o++ = *esc; + } + } + *o++ = *i; + } + *o = '\0'; + return out; +} + +int run(int logdepth, const char *cmd_, char **stdout_saved) +{ + char *cmd; + char *fn_out, *temp_out; + char *fn_out_esc, *temp_out_esc; + int ret; + const char *emu; + + assert(cmd_ != NULL); + + /* blind cross compiling mode means we always assume success */ + if (cross_blind) { + if (stdout_saved != NULL) + *stdout_saved = NULL; + return 0; + } + + emu = get("sys/emu"); + + /* emu == NULL means we need an emulator but we don't have one and + we should pretend everything went well (and of course can't provide + output.) */ + if (emu == NULL) { + if (stdout_saved != NULL) + *stdout_saved = NULL; + return 0; + } + + /* emu == false means we need an emulator and we don't want to pretend -> fatal */ + if (strcmp(emu, sfalse) == 0) { + error("Trying to run unavailable emulator (db_cwd='%s')\n", db_cwd); + abort(); + } + + temp_out = tempfile_new(".out"); + fn_out = tempfile_new(""); + + temp_out_esc = shell_escape_dup(temp_out); + fn_out_esc = shell_escape_dup(fn_out); + cmd = malloc(strlen(emu) + strlen(cmd_) + strlen(fn_out_esc) + strlen(temp_out_esc) + 32); + sprintf(cmd, "%s %s >%s 2>>%s", emu, cmd_, fn_out_esc, temp_out_esc); + free(temp_out_esc); + free(fn_out_esc); + + logprintf(logdepth, "run: '%s'\n", cmd); + ret = system(cmd); + log_merge(logdepth + 1, temp_out); + unlink(temp_out); + free(temp_out); + logprintf(logdepth, "run result: %d\n", ret); + free(cmd); + + if (stdout_saved != NULL) { + if (ret == 0) { + *stdout_saved = load_file(fn_out); + logprintf(logdepth, "stdout: '%s'\n", *stdout_saved); + } + else + *stdout_saved = NULL; + } + + unlink(fn_out); + free(fn_out); + return ret; +} + +int run_shell(int logdepth, const char *cmd_, char **stdout_saved) +{ + int ret; + char *cmd, *cmd_esc; + const char *emu; + const char *shell; + + emu = get("sys/emulator"); + if (emu == NULL) + emu = ""; + + shell = get("sys/shell"); + if (shell == NULL) { + error("No shell was specified (db_cwd='%s')\n", db_cwd); + abort(); + } + + cmd_esc = shell_escape_dup(cmd_); + cmd = malloc(strlen(emu) + strlen(shell) + strlen(cmd_esc) + 16); + if (istrue(get("sys/shell_needs_quote"))) + sprintf(cmd, "%s %s \"%s\"", emu, shell, cmd_); + else + sprintf(cmd, "%s %s %s", emu, shell, cmd_); + free(cmd_esc); + + ret = run(logdepth, cmd, stdout_saved); + free(cmd); + return ret; +} + + +int compile_run(int logdepth, const char *testcode, const char *cc, const char *cflags, const char *ldflags, char **stdout_saved) +{ + int ret; + char *fn_output = NULL; + + ret = compile_code(logdepth+1, testcode, &fn_output, cc, cflags, ldflags); + + if (ret == 0) { + char *fn_output_esc = shell_escape_dup(fn_output); + ret = run(logdepth+1, fn_output_esc, stdout_saved); + free(fn_output_esc); + } + + if (fn_output != NULL) { + unlink(fn_output); + free(fn_output); + } + return ret; +} + +int run_script(int logdepth, const char *interpreter, const char *script, const char *suffix, char **out) +{ + char *temp, *cmd; + int res; + + temp = tempfile_dump(script, suffix); + cmd = malloc(strlen(temp) + strlen(interpreter) + 4); + sprintf(cmd, "%s %s", interpreter, temp); + + res = run(logdepth, cmd, out); + + unlink(temp); + free(temp); + free(cmd); + return res; +} + diff --git a/scconfig/src/default/lib_file.c b/scconfig/src/default/lib_file.c new file mode 100644 index 0000000..86af971 --- /dev/null +++ b/scconfig/src/default/lib_file.c @@ -0,0 +1,240 @@ +/* + scconfig - library to query files and directories + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include +#include "db.h" +#include "libs.h" +#include "log.h" +#include "dep.h" + +int file_size(const char *name) +{ + struct stat buf; + if (stat(name, &buf) != 0) + return -1; + return buf.st_size; +} + +char *tempdir_new(int logdepth, const char *suffix) +{ + char s[1024]; + char *cmd; + const char *tmp; + const char *mkdir, *emu; + unsigned int rn, n; + + require("sys/tmp", logdepth+1, 1); + tmp = get("sys/tmp"); + + if (strlen(suffix) > sizeof(s) - strlen(tmp) - 32) { + fprintf(stderr, "Not enough room for creating temporary file name\n"); + abort(); + } + + require("fstools/mkdir", logdepth+1, 1); + mkdir = get("fstools/mkdir"); + + emu = get("sys/emu"); + if (emu == NULL) + emu = ""; + + for(n = 0; n < 128; n++) { + rn = rand() % 100000; + sprintf(s, "%sscc_%u%s", tmp, rn, suffix); + if (!exists(s)) { + char *s_esc = shell_escape_dup(s); + cmd = malloc(strlen(s_esc) + strlen(mkdir) + 16); + sprintf(cmd, "%s %s", mkdir, s_esc); + run_shell(logdepth+1, cmd, NULL); + free(s_esc); + free(cmd); + if (is_dir(s)) + return strclone(s); + } + } + error("Couldn't find a suitable temp dir name\n"); + abort(); +} + +char *tempfile_new_noabort(const char *suffix) +{ + char s[1024]; + const char *tmp; + unsigned int rn, n; + FILE *f; + + + require("/host/sys/tmp", 0, 1); + tmp = get("/host/sys/tmp"); + if (strlen(suffix) > sizeof(s) - strlen(tmp) - 32) { + fprintf(stderr, "tempfile_new_noabort(): not enough room for creating temporary file name\n"); + abort(); + } + + for(n = 0; n < 128; n++) { + rn = rand() % 100000; + sprintf(s, "%sscc_%u%s", tmp, rn, suffix); + if (!is_file(s)) { /* can not test for exists() because that would recurse is_dir */ + f = fopen(s, "w"); + if (f != NULL) { + fclose(f); + return strclone(s); + } + } + } + return NULL; +} + +char *tempfile_new(const char *suffix) +{ + char *tmp; + + tmp = tempfile_new_noabort(suffix); + if (tmp == NULL) { + error("Couldn't find a suitable temp file name\n"); + abort(); + } + return tmp; +} + +char *tempfile_dump(const char *testcode, const char *suffix) +{ + char *fn; + FILE *f; + + fn = tempfile_new(suffix); + f = fopen(fn, "w"); + fprintf(f, "%s", testcode); + fclose(f); + return fn; +} + +char *load_file(const char *name) +{ + int size; + char *content; + FILE *f; + + size = file_size(name); + if (size > 0) { + content = malloc(size+1); + *content = '\0'; + f = fopen(name, "r"); + if (f != NULL) { + int len = fread(content, 1, size, f); + if (len < 0) + len = 0; + content[len] = '\0'; + fclose(f); + } + } + else { + content = malloc(1); + *content = '\0'; + } + return content; +} + +int is_dir(const char *path) +{ + char *tmp, *path_esc; + int ret; + + require("sys/shell", 0, 1); + + path_esc = shell_escape_dup(path); + tmp = malloc(strlen(path_esc) + 16); + sprintf(tmp, "cd %s", path_esc); + ret = run_shell(0, tmp, NULL); + free(tmp); + free(path_esc); + return !ret; +} + +int is_file(const char *path) +{ + return file_size(path) >= 0; +} + +int exists(const char *path) +{ + return is_file(path) || is_dir(path); +} + + +int exists_in(const char *dir, const char *file) +{ + char *path; + int ret; + + path = malloc(strlen(dir) + strlen(file) + 5); + sprintf(path, "%s/%s", dir, file); + ret = is_file(path) || is_dir(path); + free(path); + return ret; +} + +const char *file_name_ptr(const char *path) +{ + const char *s; + s = str_rchr((char *)path, '/'); + if (s == NULL) + s = str_rchr((char *)path, '\\'); + return s; +} + +char *file_name(const char *path) +{ + const char *s; + s = file_name_ptr(path); + if (s == NULL) + return strclone(path); + s++; + return strclone(s); +} + +char *dir_name(const char *path) +{ + char *s, *r; + s = strclone(path); + r = (char *)file_name_ptr(s); + if (r == NULL) { + free(s); + return strclone(""); + } + *r = '\0'; + return s; +} + +int touch_file(const char *path) +{ + FILE *f; + f = fopen(path, "a"); + if (f == NULL) + return -1; + fclose(f); + return 0; +} diff --git a/scconfig/src/default/lib_filelist.c b/scconfig/src/default/lib_filelist.c new file mode 100644 index 0000000..54256a0 --- /dev/null +++ b/scconfig/src/default/lib_filelist.c @@ -0,0 +1,338 @@ +/* + scconfig - library for listing files in a directory + Copyright (C) 2009..2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "db.h" +#include "libs.h" +#include "log.h" +#include "dep.h" + +static void destroy_testdir(int logdepth, char *dir) +{ + const char *rm; + char *cmd, *dir_esc; + + rm = get("fstools/rm"); + if (rm == NULL) { + logprintf(logdepth, "CAN NOT delete test directory '%s': no rm available\n", dir); + return; + } + + if (dir == NULL) + return; + + logprintf(logdepth, "deleting test directory '%s'\n", dir); + + cmd = malloc(strlen(dir) + strlen(rm) + 4); + dir_esc = shell_escape_dup(dir); + sprintf(cmd, "%s %s", rm, dir_esc); + run_shell(0, cmd, NULL); + free(cmd); + free(dir); + free(dir_esc); +} + +static char *create_testdir(int logdepth) +{ + char *dir, *fn, *cmd; + const char *mkdir; + int n; + logprintf(logdepth, "creating test directory\n"); + + dir = tempdir_new(logdepth+1, ""); + logprintf(logdepth, "sandbox is: '%s'\n", dir); + + fn = malloc(strlen(dir) + 32); + for(n = 0; n < 2; n++) { + FILE *f; + sprintf(fn, "%s%sfile%d", dir, get("sys/path_sep"), n+1); + f = fopen(fn, "w"); + if (f != NULL) { + fclose(f); + if (!is_file(fn)) { + logprintf(logdepth, "Can not create file %s\n", fn); + free(fn); + destroy_testdir(logdepth, dir); + return NULL; + } + } + } + + mkdir = get("fstools/mkdir"); + + cmd = malloc(strlen(dir) + 64); + for(n = 0; n < 2; n++) { + char *fn_esc; + sprintf(fn, "%s%sdir%d", dir, get("sys/path_sep"), n+1); + fn_esc = shell_escape_dup(fn); + sprintf(cmd, "%s %s", mkdir, fn_esc); + free(fn_esc); + if (run_shell(logdepth+1, cmd, NULL) || (!is_dir(fn))) { + logprintf(logdepth, "Can not create directory %s\n", fn); + free(fn); + free(cmd); + destroy_testdir(logdepth, dir); + return NULL; + } + } + free(cmd); + free(fn); + return dir; +} + +static int test(int logdepth, int argc, char *argv[]) +{ + int dir[2], file[2], n; + int *arr, idx; + + for(n = 0; n < 2; n++) { + dir[n] = 0; + file[n] = 0; + } + + /* count the list of files, increase arrays by hit */ + for(n = 0; n < argc; n++) { + arr = NULL; + if (strncmp(argv[n], "dir", 3) == 0) { arr = dir; idx = atoi(argv[n]+3); } + if (strncmp(argv[n], "file", 4) == 0) { arr = file; idx = atoi(argv[n]+4); } + if (arr == NULL) { + logprintf(logdepth, "test fails: unknown existing file on the list: '%s'\n", argv[n]); + return 0; + } + idx--; + if ((idx < 0) || (idx > 1)) { + logprintf(logdepth, "test fails: file name changed: '%s'\n", argv[n]); + return 0; + } + arr[idx]++; + } + + /* check if every item was found exactly once */ + for(n = 0; n < 2; n++) { + if ((dir[n] != 1) || (file[n] != 1)) { + logprintf(logdepth, "test fails: %s%d not found \n", dir[n] ? "file" : "dir", n); + return 0; + } + } + + return 1; +} + +static void filelist_extract(char *out, const char *dir, const char *method, int *argc, char ***argv) +{ + char *s, sep, *start, *end; + int len, allocated = 0, count = 0; + char **arr = NULL; + const char *psep; + + psep = get("sys/path_sep"); + + len = strlen(dir); + + /* uniform separator */ + if (*method == 'w') { + /* if word splitting then convert newlines to spaces and convert tabs to spaces */ + for(s = out; *s != '\0'; s++) { + if ((*s == '\n') || (*s == '\r') || (*s == '\t')) + *s = ' '; + } + sep = ' '; + } + else { + for(s = out; *s != '\0'; s++) { + if (*s == '\r') + *s = '\n'; + } + sep = '\n'; + } + + start = out; + while((s = str_chr(start, sep)) != NULL) { + *s = '\0'; + if (strncmp(dir, start, len) == 0) + start += len; + while(*start == *psep) + start++; + + if (*start != '\0') { + end = str_chr(start, *psep); + if (end != NULL) + *end = '\0'; + + /* add only if not the same as previous and exists */ + if ((!((count > 0) && (strcmp(arr[count - 1], start) == 0))) && (exists_in(dir, start))) { + + if (count >= allocated) { + allocated = count + 32; + arr = realloc(arr, sizeof(char *) * allocated); + } + arr[count] = strclone(start); + count++; + } + } + + start = s+1; + while(*start == sep) start++; + } + *argc = count; + *argv = arr; +} + +void filelist_free(int *argc, char ***argv) +{ + int n; + + if (*argv == NULL) + return; + + for(n = 0; n < *argc; n++) + free((*argv)[n]); + free(*argv); + *argc = 0; +} + +static char *filelist_asmcmd(const char *dir, const char *list_cmd) +{ + char *cmd; + + cmd = malloc(strlen(dir) + strlen(list_cmd) + 32); + sprintf(cmd, list_cmd, dir); + return cmd; +} + +static int try(int logdepth, const char *dir, const char *list_cmd, const char *method) +{ + char *cmd, *out, *dir_esc; + int argc, res; + char **argv; + + dir_esc = shell_escape_dup(dir); + cmd = filelist_asmcmd(dir_esc, list_cmd); + free(dir_esc); + logprintf(logdepth, "trying '%s'...\n", cmd); + + run_shell(logdepth+1, cmd, &out); + if (out != NULL) { + filelist_extract(out, dir, method, &argc, &argv); + res = test(logdepth+1, argc, argv); + filelist_free(&argc, &argv); + free(out); + } + + if (res) { + logprintf(logdepth+1, "Works.", cmd); + put("/internal/filelist/cmd", list_cmd); + put("/internal/filelist/method", method); + report("OK ('%s' with %s split)\n", list_cmd, method); + } + + free(cmd); + return res; +} + +int find_filelist(const char *name, int logdepth, int fatal) +{ + char *dir; + char *old_cwd; + int ret; + + old_cwd = strclone(db_cwd); + db_cd("/host"); + + require("fstools/mkdir", logdepth, fatal); + require("fstools/rm", logdepth, fatal); + + + report("Checking for filelist... "); + logprintf(logdepth, "find_filelist: trying to find file listing...\n"); + logdepth++; + + + dir = create_testdir(logdepth); + if (dir == NULL) { + report("Failed to creat sandbox\n"); + ret = 1; + goto end; + } + + if ( + try(logdepth, dir, "ls %s", "line") || /* should return one file name per line since the output is redirected */ + try(logdepth, dir, "ls -1 %s", "line") || /* try to force one file name per line */ + try(logdepth, dir, "ls --format=single-column %s", "line") || /* for gnu ls */ + try(logdepth, dir, "find %s", "line") || /* if ls fails, we try find */ + try(logdepth, dir, "ls %s", "word") || /* if that fails too, ls may still have a list in multiple columns */ + try(logdepth, dir, "dir %s", "word") || /* or we are on windows where we need to use dir maybe */ + try(logdepth, dir, "echo %s/*", "word")) { /* or on a system without ls, dir or anything alike, but shell globbing may still work */ + + destroy_testdir(logdepth, dir); + ret = 0; + goto end; + } + + destroy_testdir(logdepth, dir); + ret = 1; + end:; + db_cd(old_cwd); + free(old_cwd); + return ret; +} + + +void filelist(int logdepth, const char *dir, int *argc, char ***argv) +{ + const char *list_cmd, *method; + char *cmd, *out, *dir_esc; + char *old_cwd; + + old_cwd = strclone(db_cwd); + db_cd("/host"); + + /* make sure these are set to invalid for easier return in case we fail anywhere later */ + *argc = -1; + *argv = NULL; + + if (!is_dir(dir)) + goto end; + + require("/internal/filelist/cmd", logdepth, 1); + require("/internal/filelist/method", logdepth, 1); + + list_cmd = get("/internal/filelist/cmd"); + method = get("/internal/filelist/method"); + + dir_esc = shell_escape_dup(dir); + cmd = filelist_asmcmd(dir_esc, list_cmd); + free(dir_esc); + run_shell(logdepth+1, cmd, &out); + if (out != NULL) { + filelist_extract(out, dir, method, argc, argv); + logprintf(logdepth, "filelist: Getting list of files in %s\n", dir); + free(out); + } + + free(cmd); + end:; + db_cd(old_cwd); + free(old_cwd); +} diff --git a/scconfig/src/default/lib_pkg_config.c b/scconfig/src/default/lib_pkg_config.c new file mode 100644 index 0000000..8f81220 --- /dev/null +++ b/scconfig/src/default/lib_pkg_config.c @@ -0,0 +1,188 @@ +#include +#include +#include +#include +#include +#include +#include "log.h" +#include "libs.h" +#include "db.h" +#include "dep.h" +#include "regex.h" + +static void zap(char **str) +{ + const char *pat = get("/arg/sys/pkg-config-zap"); + char *n; + + if (pat == NULL) + return; + if (re_comp(pat) != NULL) + return; + while (re_exec(*str)) { + n = re_subs_dup(""); + free(*str); + *str = n; + } +} + +int run_gen_config(int logdepth, const char *confname, const char *pkgname, char **cflags, char **ldflags) +{ + char cmd[256]; + + assert(strlen(pkgname) < sizeof(cmd) - 64); + + if (cflags != NULL) { + sprintf(cmd, "%s --cflags %s", confname, pkgname); + if (run(logdepth, cmd, cflags) != 0) { + report("not found: %s --cflags failed.", confname); + logprintf(logdepth, "not found: %s --cflags failed.\n", confname); + return -1; + } + if (*cflags != NULL) { + zap(cflags); + strip(*cflags); + } + } + + if (ldflags != NULL) { + sprintf(cmd, "%s --libs %s", confname, pkgname); + if (run(logdepth, cmd, ldflags) != 0) { + report("not found: %s --libs failed.", confname); + logprintf(logdepth, "not found: %s --libs failed.\n", confname); + if (cflags != NULL) + free(*cflags); + return -1; + } + if (*ldflags != NULL) { + zap(ldflags); + strip(*ldflags); + } + } + + return 0; +} + +const char *pkg_config_name() +{ + const char *name; + name = get("/arg/sys/pkg-config"); + if (name != NULL) + return name; + return "pkg-config"; /* fallback */ +} + +/** run_pkg_config_modversion: + run `pkg-config` on @pkgname: + - with `--modversion` if @modversion is not NULL, storing the result in @modversion (malloc()'d) + Returns 0 on success. +*/ +int run_pkg_config_modversion(int logdepth, const char *pkgname, char **modversion) +{ + char cmd[256]; + const char *confname = pkg_config_name(); + + assert(strlen(pkgname) < sizeof(cmd) - 64); + + if (modversion != NULL) { + sprintf(cmd, "%s --modversion %s", confname, pkgname); + if (run(logdepth, cmd, modversion) != 0) { + /*report("Module version not found: %s --modversion %s failed.", confname, pkgname); + logprintf(logdepth, "Module version not found: %s --modversion %s failed.\n", confname, pkgname); */ + return -1; + } + zap(modversion); + strip(*modversion); + } + + return 0; +} + +/** run_pkg_config_modversion_db: + run `pkg-config --modversion` on @pkgname to find module (or package) version + and store the result in @node/modversion + Returns 0 on success. +*/ +int run_pkg_config_modversion_db(int logdepth, const char *node, const char *pkgname /*, char **modversion */ ) +{ + char *modversion; + char *tmp; + + if (run_pkg_config_modversion(logdepth, pkgname, &modversion) != 0) { + return -1; + } + /* Store the module version in node */ + tmp = str_concat("/", node, "modversion", NULL); + put(tmp, modversion); + free(tmp); + free(modversion); + + return 0; +} + +int run_pkg_config(int logdepth, const char *pkgname, char **cflags, char **ldflags) +{ + + return run_gen_config(logdepth, pkg_config_name(), pkgname, cflags, ldflags); +} + +void run_pkg_config_lst(int logdepth, const char *pkgpat, int *argc, char ***argv) +{ + char *end, *s, *next; + int n = 0, a = 0; + char **sf = NULL; + static const char *pkg_cfg_cache = NULL; + static char no_pkg_cfg; + char *list; + + if (pkg_cfg_cache == &no_pkg_cfg) + goto error; + + if (pkg_cfg_cache == NULL) { + char *cmd = str_concat(" ", pkg_config_name(), "--list-all", NULL); + run(logdepth, cmd, (char **) &pkg_cfg_cache); + free(cmd); + if (pkg_cfg_cache == NULL) { + pkg_cfg_cache = &no_pkg_cfg; + goto error; + } + } + + if (re_comp(pkgpat) != NULL) + goto error; + + s = list = strclone(pkg_cfg_cache); + for (;;) { + while (isspace(*s)) + s++; + if (*s == '\0') + break; + next = strpbrk(s, "\r\n"); + if (next != NULL) + *next = '\0'; + if (re_exec(s)) { + if ((n + 2) >= a) { /* n+2: make sure there's always room for the NULL at the end */ + a += 16; + sf = realloc(sf, sizeof(char *) * a); + } + end = strpbrk(s, " \t"); + if (end != NULL) + *end = '\0'; + + sf[n] = strclone(s); + sf[n + 1] = re_subs_dup(""); +/* report("\ns='%s' sf='%s'\n", s, sf[n]);*/ + n += 2; + } + s = next + 1; + } + + if (sf != NULL) + sf[n] = NULL; + + free(list); +error:; + *argc = n; + *argv = sf; + return; +} diff --git a/scconfig/src/default/lib_srctree.c b/scconfig/src/default/lib_srctree.c new file mode 100644 index 0000000..17809f4 --- /dev/null +++ b/scconfig/src/default/lib_srctree.c @@ -0,0 +1,77 @@ +/* + scconfig - library to explore the source tree + Copyright (C) 2015 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "db.h" +#include "libs.h" +#include "log.h" +#include "dep.h" + + +char *svn_info(int logdepth, const char *dir, const char *key) +{ + char *cmd, *stdo = NULL; + char *res = NULL; + int keylen = strlen(key); + + cmd = str_concat(" ", "svn info", dir, NULL); + if (run_shell(logdepth, cmd, &stdo) == 0) { + char *line, *nline; + + /* check key against each line */ + for(line = stdo; line != NULL; line = nline) { + /* split line */ + nline = strpbrk(line, "\r\n"); + if (nline != NULL) { + *nline = '\0'; + nline++; + while((*nline == '\n') || (*nline == '\r')) nline++; + } + + /* compare key */ + if (strncmp(line, key, keylen) == 0) { + char *val; + + /* extract value */ + val = strchr(line, ':'); + if (val != NULL) { + val++; + while((*val == ' ') || (*val == '\t')) val++; + } + else + val = line; + + /* produce output */ + res = strclone(val); + goto found; + } + } + } + + found:; + if (stdo != NULL) + free(stdo); + free(cmd); + return res; +} diff --git a/scconfig/src/default/lib_try.c b/scconfig/src/default/lib_try.c new file mode 100644 index 0000000..da03685 --- /dev/null +++ b/scconfig/src/default/lib_try.c @@ -0,0 +1,410 @@ +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +/* Returns true if the first 2 characters of the output is OK */ +static int try_icl_accept_ok(char *stdout_str) +{ + return (strncmp(stdout_str, "OK", 2) == 0); +} + +#define is_ctrl_prefix(ch) (((ch) == '!') || ((ch) == '^')) + +static int try_icl__(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, const char *ldflags, const char *db_includes, const char *db_cflags, const char *db_ldflags, int run, int (*accept_res)(char *stdout_str)) +{ + char *out = NULL; + char *tmp, *inc; + const char *test_c; + char c[1024]; + int l, compres; + + if (includes != NULL) { + l = strlen(includes); + memcpy(c, includes, l); + c[l] = '\n'; + l++; + strcpy(c+l, test_c_in); + test_c = c; + } + else + test_c = test_c_in; + + logprintf(logdepth, "trying '%s' and '%s' and '%s', %s\n", str_null(db_includes), str_null(db_cflags), str_null(db_ldflags), run ? "with a run" : "with no run"); + + if (run) + compres = compile_run(logdepth+1, test_c, NULL, cflags, ldflags, &out); + else { + char *fn_output = NULL; + compres = compile_code(logdepth+1, test_c, &fn_output, NULL, cflags, ldflags); + if (fn_output != NULL) { + unlink(fn_output); + free(fn_output); + } + } + + + if (compres == 0) { + if (!run || target_emu_fail(out) || accept_res(out)) { + free(out); + + /* no prefix: don't modify the database, the caller will do that */ + if (prefix == NULL) + return 1; + + tmp = malloc(strlen(prefix) + 32); + + if ((db_includes == NULL) || (*db_includes == '\0')) + inc = strclone(""); + else + inc = uniq_inc_str(db_includes, NULL, "\\n", 0, 0, NULL); + sprintf(tmp, "%s/includes", prefix); + put(tmp, inc); + + if (db_cflags == NULL) + db_cflags = ""; + + sprintf(tmp, "%s/cflags", prefix); + put(tmp, db_cflags); + + + if (db_ldflags == NULL) + db_ldflags = ""; + sprintf(tmp, "%s/ldflags", prefix); + put(tmp, db_ldflags); + + if (inc != NULL) { + report("OK ('%s', '%s' and '%s')\n", str_null(inc), str_null(db_cflags), str_null(db_ldflags)); + free(inc); + } + else + report("OK ('%s' and '%s')\n", str_null(db_cflags), str_null(db_ldflags)); + + sprintf(tmp, "%s/presents", prefix); + put(tmp, strue); + free(tmp); + return 1; + } + free(out); + } + return 0; +} + +#define LOAD(node) \ +do { \ + if (u ## node != NULL) break; \ + strcpy(apath_end, #node); \ + u ## node = get(apath); \ +} while(0) + +#define SET(dst, src, is_flag) \ +do { \ + char *__sep__ = is_flag ? " " : "\n"; \ + const char *__dst__ = dst == NULL ? "" : dst; \ + char *__out__; \ + if (is_ctrl_prefix(*__dst__)) __dst__++; \ + if (*src == '!') \ + __out__ = strclone(src+1); \ + else if (*src == '^') {\ + if (src[1] != '\0') \ + __out__ = str_concat("", src+1, __sep__, __dst__, NULL); \ + else \ + __out__ = strclone(__dst__); \ + } \ + else { \ + if (*__dst__ != '\0') \ + __out__ = str_concat("", __dst__, __sep__, src, NULL); \ + else \ + __out__ = strclone(src); \ + } \ + free(dst); \ + dst = __out__; \ + if (is_flag) { \ + char *__s__; \ + for(__s__ = dst; *__s__ != '\0'; __s__++) \ + if ((*__s__ == '\n') || (*__s__ == '\r')) \ + *__s__ = ' '; \ + } \ +} while(0) + + +/* Figure user overrides and call try_icl__() accordingly */ +int try_icl_(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, const char *ldflags, int run, int (*accept_res)(char *stdout_str)) +{ + char apath[1024], *apath_end; + int l, res; + const char *uincludes = NULL, *ucflags = NULL, *uldflags = NULL, *uprefix = NULL; /* user specified */ + char *rincludes, *rcflags, *rldflags; /* real */ + char *dbincludes = NULL, *dbcflags = NULL, *dbldflags = NULL; /* what to add in the db at the end */ + + /* load uincludes, uclfags, uldflags and uprefix - LOAD() inserts the u */ + l = sprintf(apath, "/arg/icl/%s/", prefix); apath_end = apath+l; + LOAD(includes); + LOAD(cflags); + LOAD(ldflags); + LOAD(prefix); + l = sprintf(apath, "/arg/icl/%s/%s/", db_cwd, prefix); apath_end = apath+l; + LOAD(includes); + LOAD(cflags); + LOAD(ldflags); + LOAD(prefix); + + /* special case: all three specified by the user - ignore what the detector wanted, but run only once per node prefix */ + if ((uincludes != NULL) && (ucflags != NULL) && (uldflags != NULL)) { + const char *am; + sprintf(apath, "%s/icl/all_manual_result", prefix); + am = get(apath); + if (am != NULL) + return istrue(am); /* return cached result if available */ + res = try_icl__(logdepth, prefix, test_c_in, uincludes, ucflags, uldflags, uincludes, ucflags, uldflags, run, accept_res); + put(apath, res ? strue : sfalse); + return res; + } + + /* TODO: get default cflags here */ + rincludes = NULL; + rcflags = strclone(get("cc/cflags")); + rldflags = strclone(get("cc/ldflags")); + + /* override base/default values with detection requested ones */ + if (includes != NULL) SET(rincludes, includes, 0); + if (cflags != NULL) SET(rcflags, cflags, 1); + if (ldflags != NULL) SET(rldflags, ldflags, 1); + + if (includes != NULL) SET(dbincludes, includes, 0); + if (cflags != NULL) SET(dbcflags, cflags, 1); + if (ldflags != NULL) SET(dbldflags, ldflags, 1); + + /* override detection with user specified ICL values */ + if (uincludes != NULL) SET(rincludes, uincludes, 0); + if (ucflags != NULL) SET(rcflags, ucflags, 1); + if (uldflags != NULL) SET(rldflags, uldflags, 1); + + if (uincludes != NULL) SET(dbincludes, uincludes, 0); + if (ucflags != NULL) SET(dbcflags, ucflags, 1); + if (uldflags != NULL) SET(dbldflags, uldflags, 1); + + /* insert prefix as needed */ + if (uprefix != NULL) { + char *old, *prfx; + + old = rcflags; + if ((rcflags != NULL) && (*rcflags == '^')) { + rcflags++; + prfx = "^"; + } + else + prfx = ""; + rcflags = str_concat("", prfx, "-I", uprefix, "/include ", rcflags, NULL); + if (old != cflags) free(old); + + old = rldflags; + if ((rldflags != NULL) && (*rldflags == '^')) { + rldflags++; + prfx = "^"; + } + else + prfx = ""; + rldflags = str_concat("", prfx, "-L", uprefix, "/lib ", rldflags, NULL); + if (old != ldflags) free(old); + } + + res = try_icl__(logdepth, prefix, test_c_in, rincludes, rcflags, rldflags, dbincludes, dbcflags, dbldflags, run, accept_res); + + /* if we had to alloc, free here */ + free(rincludes); + free(rcflags); + free(rldflags); + free(dbincludes); + free(dbcflags); + free(dbldflags); + + return res; +} + +#undef LOAD +#undef SET + +int try_icl(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, const char *ldflags) +{ + return try_icl_(logdepth, prefix, test_c_in, includes, cflags, ldflags, 1, try_icl_accept_ok); +} + +int try_icl_with_deps(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, const char *ldflags, const char *dep_includes, const char *dep_cflags, const char *dep_ldflags, int run) +{ + int res; + + if ((dep_includes != NULL) && (*dep_includes == '\0')) dep_includes = NULL; + if ((dep_cflags != NULL) && (*dep_cflags == '\0')) dep_cflags = NULL; + if ((dep_ldflags != NULL) && (*dep_ldflags == '\0')) dep_ldflags = NULL; + + if (dep_includes != NULL) includes = str_concat(" ", dep_includes, includes, NULL); + if (dep_cflags != NULL) cflags = str_concat(" ", dep_cflags, cflags, NULL); + if (dep_ldflags != NULL) ldflags = str_concat(" ", dep_ldflags, ldflags, NULL); + + res = try_icl_(logdepth, prefix, test_c_in, includes, cflags, ldflags, run, try_icl_accept_ok); + + if (dep_includes != NULL) free((char *)includes); + if (dep_cflags != NULL) free((char *)cflags); + if (dep_ldflags != NULL) free((char *)ldflags); + + return res; +} + +int try_icl_norun(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, const char *ldflags) +{ + return try_icl_(logdepth, prefix, test_c_in, includes, cflags, ldflags, 0, try_icl_accept_ok); +} + + +int try_fail(int logdepth, const char *prefix) +{ + char *tmp; + tmp = malloc(strlen(prefix) + 32); + sprintf(tmp, "%s/presents", prefix); + put(tmp, sfalse); + free(tmp); + report("not found\n"); + logprintf(logdepth, "NOT FOUND."); + return 1; +} + +static int try_pkg_config_(int logdepth, char *pkgname, const char *prefix, const char *test_c) +{ + char *cflags, *ldflags; + int res; + + logprintf(logdepth, "Trying pkg-config %s\n", pkgname); + if (run_pkg_config(logdepth+1, pkgname, &cflags, &ldflags) == 0) + res = try_icl(logdepth+1, prefix, test_c, NULL, cflags, ldflags); + else + res = 0; + free(cflags); + free(ldflags); + + return res; +} + +int try_icl_pkg_config(int logdepth, const char *prefix, const char *test_c, char *includes, const char *pkgpat, const char *reqver) +{ + char **pkg_ver, **s; + int num_pkg_ver; + int res; + (void) includes; /* not used */ + + run_pkg_config_lst(logdepth, pkgpat, &num_pkg_ver, &pkg_ver); + if (pkg_ver == NULL) + return 0; + + if (reqver != NULL) { + /* search the list for the preferred version */ + for(s = pkg_ver; *s != NULL; s+=2) { + if (strcmp(s[1], reqver) == 0) { + if (try_pkg_config_(logdepth, s[0], prefix, test_c)) { + res = 1; + report("Found version required (%s) using pkg_config.\n", reqver); + goto out; + } + else { + report("The version required (%s) is found (via pkg_config) but does not work\n", reqver); + goto out; + } + } + } + goto out; + } + + for(s = pkg_ver; *s != NULL; s+=2) { + if (try_pkg_config_(logdepth, s[0], prefix, test_c)) { + res = 1; + goto out; + } + } + +out:; + filelist_free(&num_pkg_ver, &pkg_ver); + return res; +} + + +int import_icl(const char *key, const char *fn) +{ + char path[1024]; + + switch(*key) { + case 'l': sprintf(path, "/arg/icl/%s/ldflags", key+8); break; + case 'c': sprintf(path, "/arg/icl/%s/cflags", key+7); break; + case 'i': sprintf(path, "/arg/icl/%s/includes", key+9); break; + case 'p': sprintf(path, "/arg/icl/%s/prefix", key+7); break; + default: + return 1; + } + printf("path='%s' fn='%s'\n", path, fn); + return put(path, fn) == NULL; +} + + +static long field_accept_len; +static int field_accept_res(char *stdout_str) +{ + char *end; + field_accept_len = strtol(stdout_str, &end, 10); + if (((*end == '\0') || (*end == '\r') || (*end == '\n')) && (field_accept_len > 0)) + return 1; + return 0; +} + +int try_icl_sfield(int logdepth, const char *prefix, const char *structn, const char *fieldn, const char *includes, const char *cflags, const char *ldflags) +{ + int res; + char test_c[512]; + char ls[16]; + const char *test_c_in = + NL "#include " + NL "int main()" + NL "{" + NL " %s s;" + NL " printf(\"%%ld\\n\", (long)sizeof(s.%s));" + NL "}" + NL; + + sprintf(test_c, test_c_in, structn, fieldn); + + res = try_icl_(logdepth, prefix, test_c, includes, cflags, ldflags, 1, field_accept_res); + if (res) { + sprintf(test_c, "%s/sizeof", prefix); + sprintf(ls, "%ld", field_accept_len); + put(test_c, ls); + } + return res; +} + +int try_icl_sfields(int logdepth, const char *prefix, const char *structn, const char **fields, const char *includes, const char *cflags, const char *ldflags, int silent_exit_first_fail) +{ + int succ = 0, first = 1; + require("cc/cc", logdepth, 1); + + for(; *fields != NULL; fields++) { + report("Checking for %s.%s... ", structn, *fields); + logprintf(logdepth, "%s: checking for field %s...\n", structn, *fields); + + logdepth++; + if (try_icl_sfield(logdepth, prefix, structn, *fields, includes, cflags, ldflags)) { + succ = 1; + } + else if ((silent_exit_first_fail) && (first)) { + return 1; + } + logdepth--; + first = 0; + } + + + if (!succ) + try_fail(logdepth, "libs/fsmount/next_dev"); + + return 0; +} diff --git a/scconfig/src/default/lib_uniqinc.c b/scconfig/src/default/lib_uniqinc.c new file mode 100644 index 0000000..c66aaf1 --- /dev/null +++ b/scconfig/src/default/lib_uniqinc.c @@ -0,0 +1,324 @@ +/* + scconfig - library for making includes on a list unique + Copyright (C) 2012, 2017 Tibor Palinkas + Copyright (C) 2017 Aron Barath + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include "libs.h" +#include "db.h" +#include "regex.h" + +#define grow \ + if (used >= alloced) { \ + alloced += 16; \ + list = realloc(list, alloced * sizeof(char *)); \ + } + +char **uniq_inc_arr(const char *includes, int indirect, const char *sep_, int *numlines) +{ + char *node, *next, *cw, *nw, *snode, *orig_node; + char *sep; + char **list; + int alloced, used, n; + + orig_node = strclone(includes); + node = orig_node; + if (sep_ == NULL) + sep = strclone("\r\n"); + else + sep = strclone(sep_); + + /* reset list */ + alloced = used = 0; + list = NULL; + /* take arguments one by one */ + while(node != NULL) { + if (indirect) { + while((*node == ' ') || (*node == '\t')) node++; + next = strpbrk(node, " \t"); + } + else { + for(;;) { + next = strpbrk(node, sep); + if ((next > node) || (next == NULL)) + break; + node = next+1; + } + } + if (next != NULL) { + *next = '\0'; + next++; + } + if (indirect) + snode = str_subsn(get(node)); + else + snode = node; + cw = snode; + /* split node value (s) by sep */ +/* fprintf(stderr, "nodename=%s snode=%s next=%s\n", node, snode, next);*/ + while(cw != NULL) { + nw = strpbrk(cw, sep); + if (nw != NULL) { + *nw = '\0'; + nw++; + } + + if (*cw != '\0') { + /* try to find cw in the existing list - this is a slow linear search for now */ + for(n = 0; n < used; n++) { + if (strcmp(list[n], cw) == 0) + goto already_on_list; + } + /* not found, append */ + grow; + list[used] = strclone(cw); + used++; + } + already_on_list:; + cw = nw; + } + if (indirect) + free(snode); + node = next; + } + grow; + list[used] = NULL; + if (numlines != NULL) + *numlines = used; + + free(orig_node); + free(sep); + + return list; +} + +void uniq_inc_free(char **arr) +{ + char **s; + for(s = arr; *s != NULL; s++) + free(*s); + free(arr); +} + +static int uniq_inc_str_cmp(const void *a_, const void *b_) +{ + char **a = (char **)a_, **b = (char **)b_; + return strcmp(*a, *b); +} + +static void uniq_inc_assemble_normal(char* const ret, int numelem, char **arr, const char *osep, const int oseplen) +{ + char *end; + int len; + + for(end = ret; 0 < numelem; ++arr, --numelem) { + if (!*arr) + continue; + + len = strlen(*arr); + memcpy(end, *arr, len); + end += len; + memcpy(end, osep, oseplen); + end += oseplen; + free(*arr); + } + + *end = '\0'; +} + +static void uniq_inc_assemble_groups(char* const ret, int numelem, char **arr, const char *osep, const int oseplen, int eren, char **eres) +{ + char *end = ret; + int erei, ndx, len; + + /* re_comp() uses a global variable to store the compiler regex! */ + + for (erei = 0; erei < eren; ++erei) { + if (re_comp(eres[erei])) + abort(); + + for (ndx = 0; ndx < numelem; ++ndx) { + if (!arr[ndx]) + continue; + + if (re_exec(arr[ndx])) { + len = strlen(arr[ndx]); + memcpy(end, arr[ndx], len); + end += len; + memcpy(end, osep, oseplen); + end += oseplen; + free(arr[ndx]); + arr[ndx] = NULL; + } + } + } + + /* collect remaining elements */ + + uniq_inc_assemble_normal(end, numelem, arr, osep, oseplen); +} + +char *uniq_inc_str(const char *includes, const char *isep, const char *osep, int sort, int eren, char **eres) +{ + char **arr, **s, *ret; + int len, numelem, oseplen; + + /* split and uniq */ + + oseplen = strlen(osep); + arr = uniq_inc_arr(includes, 0, isep, NULL); + + /* calculate the required amount of memory */ + + len = 4; /* safety margin, for terminator \0, etc. */ + numelem = 0; + for(s = arr; *s != NULL; s++) { + len += strlen(*s) + oseplen + 1; + numelem++; + } + + /* sort if needed */ + + if (sort) + qsort(arr, numelem, sizeof(char *), uniq_inc_str_cmp); + + /* allocate memory to assemble into */ + + ret = malloc(len); + + /* assemble the output */ + + if (0>=eren) + uniq_inc_assemble_normal(ret, numelem, arr, osep, oseplen); + else + uniq_inc_assemble_groups(ret, numelem, arr, osep, oseplen, eren, eres); + + /* done */ + + free(arr); + return ret; +} + +char *order_inc_str(const char *includes, const char *isep, const char *word1, int dir, const char *word2) +{ + const char *s, *next, *pre, *mid, *post; + char *out, *end; + long w1o = -1, w2o = -1; + long w1len = strlen(word1), w2len = strlen(word2), tlen; + long pre_len, mid_len, post_len; + + if (dir == 0) + return NULL; + + if ((w1len == 0) || (w2len == 0)) + return strclone(includes); + + if ((w1len == w2len) && (strcmp(word1, word2) == 0)) + return strclone(includes); + + /* search the starting offset of the first occurence of word1 and word2 */ + for(s = includes; (s != NULL) && ((w1o < 0) || (w2o < 0)); s = next) { + next = strpbrk(s, isep); + if (next == NULL) + tlen = strlen(s); + else + tlen = next-s; + + if ((w1o < 0) && (w1len == tlen) && (memcmp(s, word1, tlen) == 0)) + w1o = s - includes; + if ((w2o < 0) && (w2len == tlen) && (memcmp(s, word2, tlen) == 0)) + w2o = s - includes; + + if (next != NULL) + next += strspn(next, isep); + } + + /* one of the words is not on the list, the list is ordered */ + if ((w1o < 0) || (w2o < 0)) + return strclone(includes); + + /* both words are not on the list, but the list is ordered */ + if (((dir < 0) && (w1o < w2o)) || ((dir > 0) && (w1o > w2o))) + return strclone(includes); + + /* split up the input at word1 and word2 */ + tlen = strlen(includes); + if (dir < 0) { /* input is: 'pre w2 mid w1 post', goal is mowing w1 before w2 */ + pre = includes; + pre_len = w2o; + mid = includes + w2o + w2len + 1; + mid_len = (includes + w1o) - mid; + post = includes + w1o + w1len + 1; + post_len = (includes + tlen) - post + 1; + } + else { /* input is: 'pre w1 mid w2 post' goal is moving w1 after w2*/ + pre = includes; + pre_len = w1o; + mid = includes + w1o + w1len + 1; + mid_len = (includes + w2o) - mid; + post = includes + w2o + w2len + 1; + post_len = (includes + tlen) - post + 1; + } + + /* truncate trailing separator, if present */ + if ((pre_len > 0) && (strchr(isep, pre[pre_len-1]))) + pre_len--; + + if ((mid_len > 0) && (strchr(isep, mid[mid_len-1]))) + mid_len--; + + if ((post_len > 0) && (strchr(isep, post[mid_len-1]))) + post_len--; + + /* allocate extra space for a trailing separator and/or \0 */ + end = out = malloc(tlen+2); + + /* build the string by appending the parts */ +#define append(str, len) \ + if (len > 0) { \ + memcpy(end, str, len); \ + end += len; \ + *end = *isep; \ + end++; \ + } + + append(pre, pre_len); + if (dir < 0) { + append(word1, w1len); + append(word2, w2len); + } + append(mid, mid_len); + if (dir > 0) { + append(word2, w2len); + append(word1, w1len); + } + append(post, post_len); + +#undef append + + /* replace the last separator with \0 or just add a \0 at the end */ + if ((end > out) && (strchr(isep, end[-1]))) + end[-1] = '\0'; + else + end[0] = '\0'; + return out; +} diff --git a/scconfig/src/default/libs.h b/scconfig/src/default/libs.h new file mode 100644 index 0000000..9d7ab72 --- /dev/null +++ b/scconfig/src/default/libs.h @@ -0,0 +1,161 @@ +#define NL "\n" + +/* main.c */ +extern int no_autodetect_sys; /* set this to 1 to suppress system and cross detection */ +extern int no_save_cache; /* set this to 1 to avoid saving config.cache */ + +/* lib_try.c: try to compile and run a test code; save results under prefix, if worked */ +/* include, compile-flags, link-flags; + NULL includes, cflags, *ldflags means don't put anything in the db; cflags + and ldflags may be prefixed with "+" to include standard flags; + the test code has to print "OK" if it worked. If prefix is NULL, do not + modify the db or announce the output, silently return 0 or 1. + Returns 1 if worked, 0 if not */ +int try_icl(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, + const char *ldflags); + +/* same as try_icl(), but does not execute the code, only compiles. Useful + for test programs with undesirable side effects (e.g. gtk: would open a window) */ +int try_icl_norun(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, + const char *ldflags); + +/* same as try_icl, but also insert flags picked up from deps (if not NULL); + useful for detecting features that depend on other detected features. + If run is 0, do not run the test program, compile only */ +int try_icl_with_deps(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, const char *ldflags, const char *dep_includes, const char *dep_cflags, const char *dep_ldflags, int run); + +/* Low level function for the same, giving more control to the caller */ +int try_icl_(int logdepth, const char *prefix, const char *test_c_in, const char *includes, const char *cflags, const char *ldflags, int run, int (*accept_res)(char *stdout_str)); + +/* use try_icl() on a list of packages found by pkg-config. Stick to the version + required if reqver is non-NULL, else try them in the order pkg-config returned + them. */ +int try_icl_pkg_config(int logdepth, const char *prefix, const char *test_c, char *includes, const char *pkgpat, + const char *reqver); + +/* call this when failed to find the feature (after multiple try_*() calls); + always returns 1 (so that return try_fail() does the Right Thing) */ +int try_fail(int logdepth, const char *prefix); + +/* Import an argument for controlling try_icl() */ +int import_icl(const char *key, const char *fn); + +/* Determine the sizeof() of a struct field; works the same way as icl() but + also sets prefix/sizeof */ +int try_icl_sfield(int logdepth, const char *prefix, const char *structn, const char *fieldn, const char *includes, const char *cflags, const char *ldflags); +int try_icl_sfields(int logdepth, const char *prefix, const char *structn, const char **fields, const char *includes, const char *cflags, const char *ldflags, int silent_exit_first_fail); + + +/* lib_compile.c */ +extern int cross_blind; /* 1 if crosscompiling is blind (no emulator to test with) */ + +char *shell_escape_dup(const char *in); /* strdup in and escape any special char for the shell */ + +int compile_file(int logdepth, const char *fn_input, char **fn_output, const char *cc, const char *cflags, const char *ldflags); +int compile_code(int logdepth, const char *testcode, char **fn_output, const char *cc, const char *cflags, const char *ldflags); + +/* same as above, but do not add cc/cflags and cc/ldfags */ +int compile_file_raw(int logdepth, const char *fn_input, char **fn_output, const char *cc, const char *cflags, const char *ldflags); +int compile_code_raw(int logdepth, const char *testcode, char **fn_output, const char *cc, const char *cflags, const char *ldflags); + +int run(int logdepth, const char *cmd_, char **stdout_saved); +int run_shell(int logdepth, const char *cmd_, char **stdout_saved); +int compile_run(int logdepth, const char *testcode, const char *cc, const char *cflags, const char *ldflags, + char **stdout_saved); +int run_script(int logdepth, const char *interpreter, const char *script, const char *suffix, char **out); + +/* lib_file.c */ +int file_size(const char *name); +char *tempdir_new(int logdepth, const char *suffix); +char *tempfile_new(const char *suffix); +char *tempfile_dump(const char *testcode, const char *suffix); +char *load_file(const char *name); +int is_dir(const char *path); +int is_file(const char *path); +int exists(const char *path); +int exists_in(const char *dir, const char *file); +char *file_name(const char *path); /* returns malloc'd buffer */ +char *dir_name(const char *path); /* returns malloc'd buffer */ +char *tempfile_new_noabort(const char *suffix); /* for internal use - returns NULL instead of aborting when temp file can not be created */ +int touch_file(const char *path); + +/* lib_filelist.c */ +void filelist(int logdepth, const char *dir, int *argc, char ***argv); +void filelist_free(int *argc, char ***argv); + +/* lib_pkg_config.c */ + +/** run pkg config on @pkgname: + - with `--cflags` if cflags is not NULL, storing the result in cflags (malloc()'d) + - with `--libs` if ldflags is not NULL, storing the result in ldflags (malloc()'d) + Returns 0 on success. +*/ +int run_pkg_config(int logdepth, const char *pkgname, char **cflags, char **ldflags); + +/** same as run_pkg_config(), but runs a generic config tool (e.g. gdconfig) + passed in confname */ +int run_gen_config(int logdepth, const char *confname, const char *pkgname, char **cflags, char **ldflags); + +int run_pkg_config_modversion(int logdepth, const char *pkgname, char **modversion); +int run_pkg_config_modversion_db(int logdepth, const char *node, const char *pkgname); + +/** run pkg-config --list-all and keep lines matching regex pkgpat. + + argc/argv is a filelist output, each item pair is package name returned by + pkg_config (odd items are full package names, even items are suffixes: + pkgpath match removed) +*/ +void run_pkg_config_lst(int logdepth, const char *pkgpat, int *argc, char ***argv); + + +/* lib_uniqinc.c */ +char **uniq_inc_arr(const char *includes, int indirect, const char *sep, int *numlines); /* split includes by sep; includes is a list of nodes to get() if indirect is non-zero; return a NULL-terminated array of unique include strings and set *numlines if numlines is not NULL */ +void uniq_inc_free(char **arr); /* free an array returned by uniq_inc_arr() */ +char *uniq_inc_str(const char *includes, const char *isep, const char *osep, int sort, int eren, char **eres); /* take a long list of includes separated by isep and emit an uniq list separated by osep */ +char *order_inc_str(const char *includes, const char *isep, const char *word1, int dir, const char *word2); /* take a long list of includes separated by isep and emit a new list where word1 is moved before/after of word2 if dir < 0 or dir > 0 */ + +/* find_types.c */ +int find_types_something_t(const char *name, int logdepth, int fatal, const char* prefix, const char *typ, const char* define, const char *try_include); + +/* str.c */ +char *strclone(const char *str); +char *trim_left(char *str); +char *trim_right(char *str); +char *strip(char *str); +char *str_chr(char *str, char c); +char *str_rchr(char *str, char c); +char *str_subsn(const char *str); /* advanced strdup that also interprets \n */ +char *str_concat(const char *sep, ...); /* concat a list of strings into a newly allocated buffer, putting sep between them */ +char *esc_interpret(const char *str); +int chr_inset(char c, const char *set); /* returns whether c is in set */ + +/* srctree.c */ + +/* Run svn info on dir and extract the value for key; + key is case sensitive. The first match is returned or NULL if not found + or on error. */ +char *svn_info(int logdepth, const char *dir, const char *key); + +#define isblind(root) ((strncmp((root), "/target", 7) == 0) && cross_blind) +#define istarget(root) (strncmp((root), "/target", 7) == 0) + +#define target_emu_fail(out) ((isblind(db_cwd)) && (out == NULL)) + +#define safeNULL(s) ((s) == NULL ? "(NULL)" : (s)) +#define str_null(s) ((s) == NULL ? "" : (s)) + +/* Test program helper: generate code that ensures a given FUNCT exists + and is a function; can be turned off by defining SCCONFIG_ACCEPT_IMPLICIT + on scconfig compilation time */ +/* Both FUNCT1 and FUNCT2 argument *must* be used exactly once! In some + cases FUNCT1 and FUNCT2 is a format string parameter. We expect, however, + both arguments will substituted to the same value. */ +#ifdef SCCONFIG_ACCEPT_IMPLICIT +# define no_implicit(RET_TYPE, FUNCT1, FUNCT2) \ + "/* accept implicit (" FUNCT1 ", " FUNCT2 ") */\n" +#else +# define no_implicit(RET_TYPE, FUNCT1, FUNCT2) \ + "#ifndef " FUNCT1 "\n" \ + "{ " #RET_TYPE " (*tmp)() = " FUNCT2 "; if (tmp) {}}\n" \ + "#endif\n" +#endif diff --git a/scconfig/src/default/log.c b/scconfig/src/default/log.c new file mode 100644 index 0000000..cafc36f --- /dev/null +++ b/scconfig/src/default/log.c @@ -0,0 +1,132 @@ +/* + scconfig - logging + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include "log.h" +#include +#include +#include +#include + +char *spaces = " "; +FILE *logfile = NULL; +char *fn_log = "config.log"; + +void log_init(void) +{ + if (fn_log != NULL) { + /* double open for truncate - for extreme portability, please do not "fix" */ + logfile = fopen(fn_log, "w"); + assert(logfile != NULL); + fclose(logfile); + logfile = fopen(fn_log, "a"); + assert(logfile != NULL); + } +} + +void log_uninit(void) +{ + if (logfile != NULL) + fclose(logfile); +} + +void logprintf(int depth, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + + if (logfile != NULL) { + fprintf(logfile, "%s", logprefix(depth)); + vfprintf(logfile, format, ap); + fflush(logfile); + } + + va_end(ap); +} + +void error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + + va_start(ap, format); + if (logfile != NULL) { + fprintf(logfile, "###error### "); + vfprintf(logfile, format, ap); + fflush(logfile); + } + va_end(ap); + +} + +void report(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vprintf(format, ap); + fflush(stdout); + va_end(ap); + + va_start(ap, format); + if (logfile != NULL) { + fprintf(logfile, "###report### "); + vfprintf(logfile, format, ap); + fflush(logfile); + } + va_end(ap); +} + +void log_merge(int logdepth, const char *fn) +{ + FILE *f; + char line[2048]; + int lines; + + if (logfile == NULL) + return; + + f = fopen(fn, "r"); + if (f == NULL) { + logprintf(logdepth, "scconfig error: couldn't open %s for merging.\n", fn); + return; + } + lines = 0; + while(!(feof(f))) { + *line = '\0'; + fgets(line, sizeof(line), f); + if (*line != '\0') { + if (lines == 0) + logprintf(logdepth, "========= output dump start ============\n"); + lines++; + logprintf(logdepth, "%s", line); + /* Make sure we have newline at the end of each line */ + if (line[strlen(line)-1] != '\n') + logprintf(0, "\n"); + } + } + if (lines == 0) + logprintf(logdepth, "========= empty stderr =================\n"); + else + logprintf(logdepth, "========= output dump end ==============\n"); + fclose(f); +} diff --git a/scconfig/src/default/log.h b/scconfig/src/default/log.h new file mode 100644 index 0000000..4d5e97f --- /dev/null +++ b/scconfig/src/default/log.h @@ -0,0 +1,19 @@ +#include +#include + +#define max_spaces 64 +extern char *spaces; + +#define logprefix(n) (((n) > max_spaces) ? spaces : (spaces+max_spaces-(n))) + +void logprintf(int depth, const char *format, ...); +void error(const char *format, ...); +void report(const char *format, ...); + +void log_merge(int logdepth, const char *fn); + +extern FILE *logfile; +extern void log_init(void); +void log_uninit(void); + +extern char *fn_log; diff --git a/scconfig/src/default/main.c b/scconfig/src/default/main.c new file mode 100644 index 0000000..16d7442 --- /dev/null +++ b/scconfig/src/default/main.c @@ -0,0 +1,111 @@ +/* + scconfig - test code for default and scripts + Copyright (C) 2009..2016 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "db.h" +#include "find.h" +#include "log.h" +#include "arg.h" +#include "dep.h" +#include "deps_default.h" +#include "libs.h" +#include "hooks.h" +#include "regex.h" +#include "main_custom_args.h" +#include "main_lib.h" + +void re_fail(char *s, char c) +{ + fprintf(stderr, "Regex error: %s [opcode %o]\n", s, c); + abort(); +} + +int no_autodetect_sys = 0; +int no_save_cache = 0; +int main(int argc, char *argv[]) +{ + int blind_save; + + if (main_init() != 0) + return 1; + + if (main_process_args(argc, argv) != 0) + return 1; + + if (!no_autodetect_sys) { + find_target("", 0, 1); + blind_save = cross_blind; + cross_blind = 0; + printf("--- Detecting host\n"); + + require("sys/name", 0, 1); + } + + if (hook_detect_host()) { + fprintf(stderr, "hook_detect_host failed, exiting\n"); + return 1; + } + + cross_blind = blind_save; + if (!no_autodetect_sys) { + if (!iscross) + printf("--- Detecting target (same as host)\n"); + else + printf("--- Detecting target (differs from host)\n"); + } + db_cd("/target"); + run_custom_reqs(); + + + if (hook_detect_target()) { + fprintf(stderr, "hook_detect_target failed, exiting\n"); + return 1; + } + +#ifdef RUNTIME + if (!no_autodetect_sys) { + if (!iscross) + printf("--- Detecting runtime (same as host)\n"); + else + printf("--- Detecting runtime (differs from host)\n"); + } + db_cd("/runtime"); + if (hook_detect_runtime()) { + fprintf(stderr, "hook_detect_runtime failed, exiting\n"); + return 1; + } +#endif + + if (hook_generate()) { + fprintf(stderr, "hook_generate failed, exiting\n"); + return 1; + } + + if (!no_save_cache) + export("config.cache", 1, "/"); + + main_uninit(); + return 0; +} + diff --git a/scconfig/src/default/main_custom_args.c b/scconfig/src/default/main_custom_args.c new file mode 100644 index 0000000..54eccb9 --- /dev/null +++ b/scconfig/src/default/main_custom_args.c @@ -0,0 +1,49 @@ +/* + scconfig - default way to handle custom args (save them in an array) + Copyright (C) 2016 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "hooks.h" +#include "main_custom_args.h" + +char *custom_reqs[MAX_CUSTOM_REQS]; +int num_custom_reqs = 0; + +int custom_arg(const char *key, const char *value) +{ + if (hook_custom_arg(key, value)) + return 1; + if (strcmp(key, "detect") == 0) { + printf("Will detect: %s\n", value); + if (num_custom_reqs >= MAX_CUSTOM_REQS) { + report("Too many custom reqs from the command line, exiting\n"); + exit(1); + } + custom_reqs[num_custom_reqs] = strclone(value); + num_custom_reqs++; + return 1; + } + return 0; +} diff --git a/scconfig/src/default/main_custom_args.h b/scconfig/src/default/main_custom_args.h new file mode 100644 index 0000000..9f9c86b --- /dev/null +++ b/scconfig/src/default/main_custom_args.h @@ -0,0 +1,6 @@ +#define MAX_CUSTOM_REQS 32 +extern char *custom_reqs[MAX_CUSTOM_REQS]; +extern int num_custom_reqs; + +int custom_arg(const char *key, const char *value); + diff --git a/scconfig/src/default/main_lib.c b/scconfig/src/default/main_lib.c new file mode 100644 index 0000000..0d60fe7 --- /dev/null +++ b/scconfig/src/default/main_lib.c @@ -0,0 +1,204 @@ +/* + scconfig - helpers for a main() + Copyright (C) 2009..2016 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "db.h" +#include "find.h" +#include "log.h" +#include "arg.h" +#include "dep.h" +#include "deps_default.h" +#include "libs.h" +#include "hooks.h" +#include "regex.h" +#include "main_custom_args.h" + +#ifdef PLUGIN_SCRIPTS +#include "../scripts/INIT.h" +#endif + +#ifdef PLUGIN_PARSER +#include "../parser/INIT.h" +#endif + +#ifdef PLUGIN_C99 +#include "../c99/INIT.h" +#endif + +#ifdef PLUGIN_PARSGEN +#include "../parsgen/INIT.h" +#endif + +#ifdef PLUGIN_MATH +#include "../math/INIT.h" +#endif + +#ifdef PLUGIN_SOCKET +#include "../socket/INIT.h" +#endif + +#ifdef PLUGIN_USERPASS +#include "../userpass/INIT.h" +#endif + +#ifdef PLUGIN_GUI +#include "../gui/INIT.h" +#endif + +#ifdef PLUGIN_TTY +#include "../tty/INIT.h" +#endif + +#ifdef PLUGIN_SUL +#include "../sul/INIT.h" +#endif + +#ifdef PLUGIN_POSIX +#include "../posix/INIT.h" +#endif + +#ifdef PLUGIN_GENERATOR +#include "generator.h" +#endif + + +void init(void) +{ + db_init(); + log_init(); + dep_init(); + deps_default_init(); + + /* common internal directory */ + db_mkdir("/internal"); + + /* We have a host system for sure - also make it the default */ + db_mkdir("/host"); + db_cd("/host"); + + /* emulator for the host system is empty string */ + put("sys/emu", ""); + +#ifdef PLUGIN_SCRIPTS +#include "../scripts/INIT.c" +#endif + +#ifdef PLUGIN_PARSER +#include "../parser/INIT.c" +#endif + +#ifdef PLUGIN_C99 +#include "../c99/INIT.c" +#endif + +#ifdef PLUGIN_PARSGEN +#include "../parsgen/INIT.c" +#endif + +#ifdef PLUGIN_MATH +#include "../math/INIT.c" +#endif + +#ifdef PLUGIN_SOCKET +#include "../socket/INIT.c" +#endif + +#ifdef PLUGIN_USERPASS +#include "../userpass/INIT.c" +#endif + +#ifdef PLUGIN_GUI +#include "../gui/INIT.c" +#endif + +#ifdef PLUGIN_TTY +#include "../tty/INIT.c" +#endif + +#ifdef PLUGIN_SUL +#include "../sul/INIT.c" +#endif + +#ifdef PLUGIN_POSIX +#include "../posix/INIT.c" +#endif + +#ifdef PLUGIN_GENERATOR +#include "../generator/INIT.c" +#endif +} + +void uninit(void) +{ + log_uninit(); + dep_uninit(); + db_uninit(); +} + +void run_custom_reqs(void) +{ + int n; + if (num_custom_reqs > 0) { + printf("Running custom requirements\n"); + for(n = 0; n < num_custom_reqs; n++) { + if (custom_reqs[n] == NULL) { + fprintf(stderr, "Error: requested detection of empty string - please check your command line, syntax is --detect=node\n"); + exit(1); + } + require(custom_reqs[n], 1, 1); + } + } +} + +int main_init(void) +{ + re_modw("./\\"); + if (hook_preinit()) { + fprintf(stderr, "hook_preinit failed, exiting\n"); + return 1; + } + init(); + if (hook_postinit()) { + fprintf(stderr, "hook_postinit failed, exiting\n"); + return 1; + } + return 0; +} + +int main_process_args(int argc, char *argv[]) +{ + process_args(argc, argv); + if (hook_postarg()) { + fprintf(stderr, "hook_postarg failed, exiting\n"); + return 1; + } + return 0; +} + +void main_uninit(void) +{ + hook_preuninit(); + uninit(); + hook_postuninit(); +} diff --git a/scconfig/src/default/main_lib.h b/scconfig/src/default/main_lib.h new file mode 100644 index 0000000..1b5e5b3 --- /dev/null +++ b/scconfig/src/default/main_lib.h @@ -0,0 +1,8 @@ +int main_init(void); +int main_process_args(int argc, char *argv[]); +void main_uninit(void); + +/* internal */ +void init(void); +void uninit(void); +void run_custom_reqs(void); diff --git a/scconfig/src/default/regex.c b/scconfig/src/default/regex.c new file mode 100644 index 0000000..4292033 --- /dev/null +++ b/scconfig/src/default/regex.c @@ -0,0 +1,611 @@ +/* + + * regex - Regular expression pattern matching and replacement + * + * By: Ozan S. Yigit (oz) + * Dept. of Computer Science + * York University + * + * These routines are the PUBLIC DOMAIN equivalents of regex + * routines as found in 4.nBSD UN*X, with minor extensions. + * + * These routines are derived from various implementations found + * in software tools books, and Conroy's grep. They are NOT derived + * from licensed/restricted software. + * For more interesting/academic/complicated implementations, + * see Henry Spencer's regexp routines, or GNU Emacs pattern + * matching module. + * + * const correctness patch by Tibor 'Igor2' Palinkas in 2009..2010 + * new subs code by Tibor 'Igor2' Palinkas in 2015 + */ +#include +#include +#include "regex.h" + +#define MAXNFA 1024 +#define MAXTAG 10 + +#define OKP 1 +#define NOP 0 + +#define CHR 1 +#define ANY 2 +#define CCL 3 +#define BOL 4 +#define EOL 5 +#define BOT 6 +#define EOT 7 +#define BOW 8 +#define EOW 9 +#define REF 10 +#define CLO 11 + +#define END 0 + +/* + * The following defines are not meant to be changeable. + * They are for readability only. + */ +#define MAXCHR 128 +#define CHRBIT 8 +#define BITBLK MAXCHR/CHRBIT +#define BLKIND 0170 +#define BITIND 07 + +#define ASCIIB 0177 + +#ifdef NO_UCHAR +typedef char CHAR; +#else +typedef unsigned char CHAR; +#endif + +static int tagstk[MAXTAG]; /* subpat tag stack..*/ +static CHAR nfa[MAXNFA]; /* automaton.. */ +static int sta = NOP; /* status of lastpat */ + +static CHAR bittab[BITBLK]; /* bit table for CCL */ + /* pre-set bits... */ +static CHAR bitarr[] = {1,2,4,8,16,32,64,128}; + +static void +chset(CHAR c) +{ + bittab[(CHAR) ((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND]; +} + +#define badpat(x) (*nfa = END, x) +#define store(x) *mp++ = x + +char * +re_comp(const char *pat) +{ + register const char *p; /* pattern pointer */ + register CHAR *mp=nfa; /* nfa pointer */ + register CHAR *lp; /* saved pointer.. */ + register CHAR *sp=nfa; /* another one.. */ + + register int tagi = 0; /* tag stack index */ + register int tagc = 1; /* actual tag count */ + + register int n; + register CHAR mask; /* xor mask -CCL/NCL */ + int c1, c2; + + if (!pat || !*pat) { + if (sta) + return 0; + else + return badpat("No previous regular expression"); + } + sta = NOP; + + for (p = pat; *p; p++) { + lp = mp; + switch(*p) { + + case '.': /* match any char.. */ + store(ANY); + break; + + case '^': /* match beginning.. */ + if (p == pat) + store(BOL); + else { + store(CHR); + store(*p); + } + break; + + case '$': /* match endofline.. */ + if (!*(p+1)) + store(EOL); + else { + store(CHR); + store(*p); + } + break; + + case '[': /* match char class..*/ + store(CCL); + + if (*++p == '^') { + mask = 0377; + p++; + } + else + mask = 0; + + if (*p == '-') /* real dash */ + chset(*p++); + if (*p == ']') /* real brac */ + chset(*p++); + while (*p && *p != ']') { + if (*p == '-' && *(p+1) && *(p+1) != ']') { + p++; + c1 = *(p-2) + 1; + c2 = *p++; + while (c1 <= c2) + chset((CHAR)c1++); + } +#ifdef EXTEND + else if (*p == '\\' && *(p+1)) { + p++; + chset(*p++); + } +#endif + else + chset(*p++); + } + if (!*p) + return badpat("Missing ]"); + + for (n = 0; n < BITBLK; bittab[n++] = (char) 0) + store(mask ^ bittab[n]); + + break; + + case '*': /* match 0 or more.. */ + case '+': /* match 1 or more.. */ + if (p == pat) + return badpat("Empty closure"); + lp = sp; /* previous opcode */ + if (*lp == CLO) /* equivalence.. */ + break; + switch(*lp) { + + case BOL: + case BOT: + case EOT: + case BOW: + case EOW: + case REF: + return badpat("Illegal closure"); + default: + break; + } + + if (*p == '+') + for (sp = mp; lp < sp; lp++) + store(*lp); + + store(END); + store(END); + sp = mp; + while (--mp > lp) + *mp = mp[-1]; + store(CLO); + mp = sp; + break; + + case '\\': /* tags, backrefs .. */ + switch(*++p) { + + case '(': + if (tagc < MAXTAG) { + tagstk[++tagi] = tagc; + store(BOT); + store(tagc++); + } + else + return badpat("Too many \\(\\) pairs"); + break; + case ')': + if (*sp == BOT) + return badpat("Null pattern inside \\(\\)"); + if (tagi > 0) { + store(EOT); + store(tagstk[tagi--]); + } + else + return badpat("Unmatched \\)"); + break; + case '<': + store(BOW); + break; + case '>': + if (*sp == BOW) + return badpat("Null pattern inside \\<\\>"); + store(EOW); + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + n = *p-'0'; + if (tagi > 0 && tagstk[tagi] == n) + return badpat("Cyclical reference"); + if (tagc > n) { + store(REF); + store(n); + } + else + return badpat("Undetermined reference"); + break; +#ifdef EXTEND + case 'b': + store(CHR); + store('\b'); + break; + case 'n': + store(CHR); + store('\n'); + break; + case 'f': + store(CHR); + store('\f'); + break; + case 'r': + store(CHR); + store('\r'); + break; + case 't': + store(CHR); + store('\t'); + break; +#endif + default: + store(CHR); + store(*p); + } + break; + + default : /* an ordinary char */ + store(CHR); + store(*p); + break; + } + sp = lp; + } + if (tagi > 0) + return badpat("Unmatched \\("); + store(END); + sta = OKP; + return 0; +} + + +static const char *bol; +const char *bopat[MAXTAG]; +const char *eopat[MAXTAG]; +static const char *pmatch(const char *, CHAR *, int *); + +/* + * re_exec: + * execute nfa to find a match. + * + * special cases: (nfa[0]) + * BOL + * Match only once, starting from the + * beginning. + * CHR + * First locate the character without + * calling pmatch, and if found, call + * pmatch for the remaining string. + * END + * re_comp failed, poor luser did not + * check for it. Fail fast. + * + * If a match is found, bopat[0] and eopat[0] are set + * to the beginning and the end of the matched fragment, + * respectively. + * + */ + +int +re_exec(const char *lp) +{ + register CHAR c; + register const char *ep = 0; + register CHAR *ap = nfa; + int score = 1; + + bol = lp; + + bopat[0] = 0; + bopat[1] = 0; + bopat[2] = 0; + bopat[3] = 0; + bopat[4] = 0; + bopat[5] = 0; + bopat[6] = 0; + bopat[7] = 0; + bopat[8] = 0; + bopat[9] = 0; + + switch(*ap) { + + case BOL: /* anchored: match from BOL only */ + ep = pmatch(lp,ap, &score); + break; + case CHR: /* ordinary char: locate it fast */ + c = *(ap+1); + while (*lp && *lp != c) + lp++; + if (!*lp) /* if EOS, fail, else fall thru. */ + return 0; + default: /* regular matching all the way. */ +#ifdef OLD + while (*lp) { + if ((ep = pmatch(lp,ap, &score))) + break; + lp++; + } +#else /* match null string */ + do { + if ((ep = pmatch(lp,ap, &score))) + break; + } while (*lp++); +#endif + break; + case END: /* munged automaton. fail always */ + return 0; + } + if (!ep) + return 0; + + bopat[0] = lp; + eopat[0] = ep; + return score; +} + +/* + * pmatch: internal routine for the hard part + * + * This code is partly snarfed from an early grep written by + * David Conroy. The backref and tag stuff, and various other + * innovations are by oz. + * + * special case optimizations: (nfa[n], nfa[n+1]) + * CLO ANY + * We KNOW .* will match everything up to the + * end of line. Thus, directly go to the end of + * line, without recursive pmatch calls. As in + * the other closure cases, the remaining pattern + * must be matched by moving backwards on the + * string recursively, to find a match for xy + * (x is ".*" and y is the remaining pattern) + * where the match satisfies the LONGEST match for + * x followed by a match for y. + * CLO CHR + * We can again scan the string forward for the + * single char and at the point of failure, we + * execute the remaining nfa recursively, same as + * above. + * + * At the end of a successful match, bopat[n] and eopat[n] + * are set to the beginning and end of subpatterns matched + * by tagged expressions (n = 1 to 9). + * + */ + +#ifndef re_fail +extern void re_fail(char *, unsigned char); +#endif + +/* + * character classification table for word boundary operators BOW + * and EOW. the reason for not using ctype macros is that we can + * let the user add into our own table. see re_modw. This table + * is not in the bitset form, since we may wish to extend it in the + * future for other character classifications. + * + * TRUE for 0-9 A-Z a-z _ + */ +static CHAR chrtyp[MAXCHR] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0 + }; + +#define inascii(x) (0177&(x)) +#define iswordc(x) chrtyp[inascii(x)] +#define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND]) + +/* + * skip values for CLO XXX to skip past the closure + */ + +#define ANYSKIP 2 /* [CLO] ANY END ... */ +#define CHRSKIP 3 /* [CLO] CHR chr END ... */ +#define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */ + +static const char * +pmatch(const char *lp, CHAR *ap, int *score) +{ + register int op, c, n; + register const char *e; /* extra pointer for CLO */ + register const char *bp; /* beginning of subpat.. */ + register const char *ep; /* ending of subpat.. */ + const char *are; /* to save the line ptr. */ + + while ((op = *ap++) != END) + switch(op) { + + case CHR: + if (*lp++ != *ap++) + return 0; + (*score) += 100; + break; + case ANY: + if (!*lp++) + return 0; + (*score)++; + break; + case CCL: + c = *lp++; + if (!isinset(ap,c)) + return 0; + ap += BITBLK; + (*score) += 2; + break; + case BOL: + if (lp != bol) + return 0; + (*score) += 10; + break; + case EOL: + if (*lp) + return 0; + (*score) += 10; + break; + case BOT: + bopat[*ap++] = lp; + break; + case EOT: + eopat[*ap++] = lp; + break; + case BOW: + if ((lp!=bol && iswordc(lp[-1])) || !iswordc(*lp)) + return 0; + (*score) += 5; + break; + case EOW: + if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp)) + return 0; + (*score) += 5; + break; + case REF: + n = *ap++; + bp = bopat[n]; + ep = eopat[n]; + while (bp < ep) { + if (*bp++ != *lp++) + return 0; + (*score) += 2; + } + break; + case CLO: + are = lp; + switch(*ap) { + + case ANY: + while (*lp) + lp++; + n = ANYSKIP; + (*score)++; + break; + case CHR: + c = *(ap+1); + while (*lp && c == *lp) + lp++; + n = CHRSKIP; + (*score) += 100; + break; + case CCL: + while ((c = *lp) && isinset(ap+1,c)) + lp++; + n = CCLSKIP; + (*score) += 2; + break; + default: + re_fail("closure: bad nfa.", *ap); + return 0; + } + + ap += n; + + while (lp >= are) { + e = pmatch(lp, ap, score); + if (e) + return e; + --lp; + } + return 0; + default: + re_fail("re_exec: bad nfa.", op); + return 0; + } + return lp; +} + +/* + * re_modw: + * add new characters into the word table to change re_exec's + * understanding of what a word should look like. Note that we + * only accept additions into the word definition. + * + * If the string parameter is 0 or null string, the table is + * reset back to the default containing A-Z a-z 0-9 _. [We use + * the compact bitset representation for the default table] + */ + +static CHAR deftab[16] = { + 0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207, + 0376, 0377, 0377, 007 +}; + +void +re_modw(char *s) +{ + register int i; + + if (!s || !*s) { + for (i = 0; i < MAXCHR; i++) + if (!isinset(deftab,i)) + iswordc(i) = 0; + } + else + while(*s) + iswordc(*s++) = 1; +} + +/* Substitute the matching part in the last re_exec call with sub. The + result is returned in a newly allocated string. */ +char *re_subs_dup(char *sub) +{ + char *dst; + const char *end; + int l1, l2, l3; + end = bol + strlen(bol); + l1 = bopat[0] - bol; + if (sub != NULL) + l2 = strlen(sub); + else + l2 = 0; + l3 = end - eopat[0]; + if (l3 < 0) + l3 = 0; + dst = malloc(l1+l2+l3+1); + memcpy(dst, bol, l1); + if (l2 != 0) + memcpy(dst+l1, sub, l2); + memcpy(dst+l1+l2, eopat[0], l3+1); + return dst; +} diff --git a/scconfig/src/default/regex.h b/scconfig/src/default/regex.h new file mode 100644 index 0000000..03830a0 --- /dev/null +++ b/scconfig/src/default/regex.h @@ -0,0 +1,14 @@ +#ifndef REGEX_H +#define REGEX_H + +extern const char *bopat[]; +extern const char *eopat[]; + + +extern char *re_comp(const char *); +extern int re_exec(const char *); +extern void re_modw(char *); +char *re_subs_dup(char *sub); + + +#endif /* REGEX_H */ diff --git a/scconfig/src/default/str.c b/scconfig/src/default/str.c new file mode 100644 index 0000000..b2b2034 --- /dev/null +++ b/scconfig/src/default/str.c @@ -0,0 +1,183 @@ +/* + scconfig - non-standard string manipulation routines + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include + +char *strclone(const char *str) +{ + int l; + char *ret; + + if (str == NULL) + return NULL; + + l = strlen(str)+1; + ret = malloc(l); + memcpy(ret, str, l); + return ret; +} + +#define SPACE(c) (((c) == '\r') || ((c) == '\n') || ((c) == '\t') || ((c) == ' ')) + +char *trim_left(char *str) +{ + while(SPACE(*str)) str++; + return str; +} + +char *trim_right(char *str) +{ + char *end; + + end = str + strlen(str) - 1; + while((end >= str) && SPACE(*end)) { *end = '\0'; end--; } + return str; +} + +char *strip(char *str) +{ + return trim_left(trim_right(str)); +} + +char *str_chr(char *str, char c) +{ + char *s; + + for(s = str; *s != '\0'; s++) + if (*s == c) + return s; + return NULL; +} + +char *str_rchr(char *str, char c) +{ + char *s, *last; + + last = NULL; + for(s = str; *s != '\0'; s++) + if (*s == c) + last = s; + return last; +} + +char *str_subsn(const char *str) +{ + char *out, *o; + const char *i; + if (str == NULL) + return strclone(""); + o = out = malloc(strlen(str)+1); + for(i = str; *i != '\0'; i++, o++) { + if ((i[0] == '\\') && (i[1] == 'n')) { + i++; + *o = '\n'; + } + else + *o = *i; + } + *o = '\0'; + return out; +} + +char *str_concat(const char *sep, ...) +{ +# define CONCAT_MAX 64 + int len[CONCAT_MAX]; + const char *str[CONCAT_MAX]; + int n, v, sum, sl; + char *out, *o; + va_list ap; + va_start(ap, sep); + + if (sep == NULL) + sep = ""; + + /* load all strings into an array, measure their lengths */ + sum = 0; + for(v = 0; ;v++) { + if (v >= CONCAT_MAX) { + fprintf(stderr, "Internal error: str_concat got more strings than CONCAT_MAX\n"); + abort(); + } + str[v] = va_arg(ap, const char *); + if (str[v] == NULL) { + len[v] = 0; + break; + } + len[v] = strlen(str[v]); + sum += len[v]; + } + + sl = strlen(sep); + sum += (v-1) * sl + 1; /* + a sep between each two strings and a terminator at the end */ + o = out = malloc(sum); + for(n = 0; n < v; n++) { + if ((n > 0) && (sl > 0)) { + memcpy(o, sep, sl); + o += sl; + } + if (len[n] > 0) { + memcpy(o, str[n], len[n]); + o += len[n]; + } + } + *o = '\0'; + va_end(ap); + return out; +} + +char *esc_interpret(const char *str) +{ + char *out, *si, *so; + + out = strclone(str); + /* replace (interpret) \ sequences in seq */ + for(si = so = out; *si != '\0'; si++,so++) { + if (si[0] == '\\') { + switch(si[1]) { + case 'n': *so = '\n'; break; + case 't': *so = '\t'; break; + case 's': *so = ' '; break; + case '\\': *so = '\\'; break; + } + si++; + } + else + *so = *si; + } + *so = '\0'; + return out; +} + +int chr_inset(char c, const char *set) +{ + while (*set != '\0') { + if (c == *set) + return 1; + set++; + } + return 0; +} + diff --git a/scconfig/src/math/INIT.c b/scconfig/src/math/INIT.c new file mode 100644 index 0000000..b4f86f1 --- /dev/null +++ b/scconfig/src/math/INIT.c @@ -0,0 +1,2 @@ + deps_math_init(); + diff --git a/scconfig/src/math/INIT.h b/scconfig/src/math/INIT.h new file mode 100644 index 0000000..4deb09a --- /dev/null +++ b/scconfig/src/math/INIT.h @@ -0,0 +1 @@ +void deps_math_init(); diff --git a/scconfig/src/math/Makefile.plugin b/scconfig/src/math/Makefile.plugin new file mode 100644 index 0000000..c50eb14 --- /dev/null +++ b/scconfig/src/math/Makefile.plugin @@ -0,0 +1,18 @@ +MATH_CFLAGS = -DPLUGIN_MATH +MATH_OBJS = \ + $(BIN)/math/find_math.o \ + $(BIN)/math/find_fpenan.o \ + $(BIN)/math/find_func.o \ + $(BIN)/math/find_mfunc_cc.o + +$(BIN)/math/find_math.o: $(SRC)/math/find_math.c + $(CC) $(CFLAGS) -c $(SRC)/math/find_math.c -o $(BIN)/math/find_math.o + +$(BIN)/math/find_fpenan.o: $(SRC)/math/find_fpenan.c + $(CC) $(CFLAGS) -c $(SRC)/math/find_fpenan.c -o $(BIN)/math/find_fpenan.o + +$(BIN)/math/find_func.o: $(SRC)/math/find_func.c + $(CC) $(CFLAGS) -c $(SRC)/math/find_func.c -o $(BIN)/math/find_func.o + +$(BIN)/math/find_mfunc_cc.o: $(SRC)/math/find_mfunc_cc.c + $(CC) $(CFLAGS) -c $(SRC)/math/find_mfunc_cc.c -o $(BIN)/math/find_mfunc_cc.o diff --git a/scconfig/src/math/find_fpenan.c b/scconfig/src/math/find_fpenan.c new file mode 100644 index 0000000..064f4ac --- /dev/null +++ b/scconfig/src/math/find_fpenan.c @@ -0,0 +1,279 @@ +/* + scconfig - math feature detection + Copyright (C) 2013 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" +#include "find_mfunc_cc.h" + +#define MATHH "#include " +int find_math_isnan(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " if (!isnan(1.0))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for isnan... "); + logprintf(logdepth, "find_math_isnan: trying to find isnan...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/math/isnan", test_c, MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnan", test_c, MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isnan", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnan", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isnan", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnan", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, "-lm")) return 0; + + return try_fail(logdepth, "libs/math/isnan"); +} + +int find_math_isinf(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " if (!isinf(1.0))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for isinf... "); + logprintf(logdepth, "find_math_isinf: try_icling to find isinf...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/math/isinf", test_c, MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isinf", test_c, MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isinf", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isinf", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isinf", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isinf", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, "-lm")) return 0; + + return try_fail(logdepth, "libs/math/isinf"); +} + + +int find_math_isfinite(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " if (isfinite(1.0))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for isfinite... "); + logprintf(logdepth, "find_math_isfinite: try_icling to find isfinite...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/math/isfinite", test_c, MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isfinite", test_c, MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isfinite", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isfinite", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isfinite", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isfinite", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, "-lm")) return 0; + + return try_fail(logdepth, "libs/math/isfinite"); +} + + +int find_math_isnormal(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " if (isnormal(1.0))" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for isnormal... "); + logprintf(logdepth, "find_math_isnormal: try_icling to find isnormal...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/math/isnormal", test_c, MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _BSD_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _ISOC99_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _ISOC99_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _XOPEN_SOURCE 600\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/isnormal", test_c, "#define _XOPEN_SOURCE 600\n" MATHH, NULL, "-lm")) return 0; + + return try_fail(logdepth, "libs/math/isnormal"); +} + + +int find_math_nan(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " if (nan(\"foo\") != 0.0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for nan... "); + logprintf(logdepth, "find_math_nan: try_icling to find nan...\n"); + logdepth++; + + if (try_icl(logdepth, "libs/math/nan", test_c, MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/nan", test_c, MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/nan", test_c, "#define _ISOC99_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/nan", test_c, "#define _ISOC99_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, "libs/math/nan", test_c, "#define _XOPEN_SOURCE 600\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, "libs/math/nan", test_c, "#define _XOPEN_SOURCE 600\n" MATHH, NULL, "-lm")) return 0; + + return try_fail(logdepth, "libs/math/nan"); +} + + +int find_math_nanop(const char *name, int logdepth, int fatal) +{ + char *test_c_temp = + NL "#include " + NL "#include " + NL "%s" + NL + NL "double s2d(const char *s)" + NL "{" + NL " if (strcmp(s, \"nan\") == 0)" + NL " return nan(\"nan\");" + NL " return atof(s);" + NL "}" + NL "int main(int argc, char *argv[])" + NL "{" + NL " double op1, op2, res;" + NL " op1 = s2d(argv[1]);" + NL " op2 = s2d(argv[3]);" + + NL " switch(*argv[2]) {" + NL " case '+': res = op1 + op2; break;" + NL " case '-': res = op1 - op2; break;" + NL " case 'M': res = op1 * op2; break;" + NL " case '/': res = op1 / op2; break;" + NL " }" + NL " if (isnan(res))" + NL " printf(\"nan\\n\");" + NL " else" + NL " printf(\"%%f\\n\", res);" + NL " return 0;" + NL "}" + NL; + char test_c[2048]; + char *inc, *test_bin, *test_bin_esc, *out, *s, *cmd = test_c; + char *tests[] = { + "%s 1 + 1", "2.0", NULL, + "%s nan + 1", "nan", "add", + "%s nan - 1", "nan", "sub", + "%s nan M 1", "nan", "multiply", + "%s nan / 1", "nan", "divide", + NULL, NULL + }; + char **test; + int bad = 0; + + require("libs/math/nan/presents", logdepth, fatal); + require("libs/math/isnan/presents", logdepth, fatal); + require("cc/cc", logdepth, fatal); + if (!istrue(get("libs/math/isnan/presents")) || !istrue(get("libs/math/nan/presents"))) + return try_fail(logdepth, "libs/math/nanop/presents"); + + inc = esc_interpret(get("libs/math/isnan/includes")); + sprintf(test_c, test_c_temp, inc); + free(inc); + + report("Checking for nanop... "); + logprintf(logdepth, "find_math_nan: try_icling to find nan...\n"); + logdepth++; + + test_bin = NULL; + if (compile_code(logdepth, test_c, &test_bin, NULL, get("libs/math/nan/cflags"), get("libs/math/nan/ldflags")) != 0) + return try_fail(logdepth, "libs/math/nanop/presents"); + report("found\n"); + + test_bin_esc = shell_escape_dup(test_bin); + for(test = tests; *test != NULL; test += 3) { + report(test[0], ""); + report("... "); + sprintf(cmd, test[0], test_bin_esc); + run(logdepth, cmd, &out); + if (test[2] != NULL) + sprintf(cmd, "libs/math/nanop/%s", test[2]); + if (out != NULL) { + if (target_emu_fail(out) || (test[1] == NULL) || (strncmp(out, test[1], strlen(test[1])) == 0)) { + report("OK (%s)\n", test[1]); + if (test[2] != NULL) + put(cmd, test[1]); + } + else { + report("? ("); + for(s = out; *s != '\0'; s++) + if ((*s == '\n') || (*s == '\r')) + *s = ' '; + report(out); + report(")\n"); + if (test[2] != NULL) + put(cmd, out); + bad = 1; + } + free(out); + } + else + bad = 1; + } + + if (bad) + put("libs/math/nanop/allok", sfalse); + else + put("libs/math/nanop/allok", strue); + + put("libs/math/nanop/presents", strue); + + unlink(test_bin); + free(test_bin); + free(test_bin_esc); + return 0; +} + diff --git a/scconfig/src/math/find_fpenan.h b/scconfig/src/math/find_fpenan.h new file mode 100644 index 0000000..75e095e --- /dev/null +++ b/scconfig/src/math/find_fpenan.h @@ -0,0 +1,7 @@ +int find_math_isnan(const char *name, int logdepth, int fatal); +int find_math_isinf(const char *name, int logdepth, int fatal); +int find_math_isfinite(const char *name, int logdepth, int fatal); +int find_math_isnormal(const char *name, int logdepth, int fatal); +int find_math_nan(const char *name, int logdepth, int fatal); +int find_math_nanop(const char *name, int logdepth, int fatal); + diff --git a/scconfig/src/math/find_func.c b/scconfig/src/math/find_func.c new file mode 100644 index 0000000..40c60a9 --- /dev/null +++ b/scconfig/src/math/find_func.c @@ -0,0 +1,86 @@ +/* + scconfig - math feature detection + Copyright (C) 2015 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +#define MATHH "#include " + +static int test_mathf(const char *name, int logdepth, int fatal, const char *fname, const char *cond) +{ + char *test_c_template = + NL "float one=1.0, zero=0.0;" + NL "int main() {" + NL " if (%s)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + char test_c[512]; + char node_name[128]; + + sprintf(test_c, test_c_template, cond); + sprintf(node_name, "libs/math/%s", fname); + + require("cc/cc", logdepth, fatal); + + report("Checking for %s... ", fname); + logprintf(logdepth, "test_mathf: trying to find %s...\n",fname); + logdepth++; + + + if (try_icl(logdepth, node_name, test_c, MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, node_name, test_c, MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, node_name, test_c, "#define _BSD_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, node_name, test_c, "#define _BSD_SOURCE\n" MATHH, NULL, "-lm")) return 0; + if (try_icl(logdepth, node_name, test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, NULL)) return 0; + if (try_icl(logdepth, node_name, test_c, "#define _XOPEN_SOURCE\n" MATHH, NULL, "-lm")) return 0; + + return try_fail(logdepth, node_name); +} + +int find_math_expf(const char *name, int logdepth, int fatal) +{ + return test_mathf(name, logdepth, fatal, "expf", "expf(zero) == 1.0"); +} + + +int find_math_logf(const char *name, int logdepth, int fatal) +{ + return test_mathf(name, logdepth, fatal, "logf", "logf(one) == 0.0"); +} + +int find_math_rint(const char *name, int logdepth, int fatal) +{ + return test_mathf(name, logdepth, fatal, "rint", "rint(4.0) == 4.0"); +} + +int find_math_round(const char *name, int logdepth, int fatal) +{ + return test_mathf(name, logdepth, fatal, "round", "round(3.6) == 4.0"); +} diff --git a/scconfig/src/math/find_func.h b/scconfig/src/math/find_func.h new file mode 100644 index 0000000..67c879f --- /dev/null +++ b/scconfig/src/math/find_func.h @@ -0,0 +1,4 @@ +int find_math_expf(const char *name, int logdepth, int fatal); +int find_math_logf(const char *name, int logdepth, int fatal); +int find_math_rint(const char *name, int logdepth, int fatal); +int find_math_round(const char *name, int logdepth, int fatal); diff --git a/scconfig/src/math/find_math.c b/scconfig/src/math/find_math.c new file mode 100644 index 0000000..ac0ba5d --- /dev/null +++ b/scconfig/src/math/find_math.c @@ -0,0 +1,73 @@ +/* + scconfig - math feature detection + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" +#include "find_func.h" +#include "find_mfunc_cc.h" +#include "find_fpenan.h" + +int find_math_minpack(const char *name, int logdepth, int fatal) +{ + char *test_c = + NL "int main() {" + NL " int one=1;" + NL " if (dpmpar_(&one) != 0.0)" + NL " puts(\"OK\");" + NL " return 0;" + NL "}" + NL; + + require("cc/cc", logdepth, fatal); + + report("Checking for minpack... "); + logprintf(logdepth, "find_math_minpack: trying to find minpack...\n"); + logdepth++; + + /* Look at the standard places */ + if (try_icl(logdepth, "libs/math/minpack", test_c, "#include ", NULL, "-lminpack")) return 0; + + return try_fail(logdepth, "libs/math/minpack"); +} + + +void deps_math_init() +{ + dep_add("libs/math/minpack/*", find_math_minpack); + dep_add("libs/math/cc/log/*", find_math_cc_log); + dep_add("libs/math/isnan/*", find_math_isnan); + dep_add("libs/math/isinf/*", find_math_isinf); + dep_add("libs/math/isfinite/*", find_math_isfinite); + dep_add("libs/math/isnormal/*", find_math_isnormal); + dep_add("libs/math/nan/*", find_math_nan); + dep_add("libs/math/nanop/*", find_math_nanop); + + dep_add("libs/math/expf/*", find_math_expf); + dep_add("libs/math/logf/*", find_math_logf); + dep_add("libs/math/rint/*", find_math_rint); + dep_add("libs/math/round/*", find_math_round); +} diff --git a/scconfig/src/math/find_mfunc_cc.c b/scconfig/src/math/find_mfunc_cc.c new file mode 100644 index 0000000..911385a --- /dev/null +++ b/scconfig/src/math/find_mfunc_cc.c @@ -0,0 +1,159 @@ +/* + scconfig - math func corner case detection + Copyright (C) 2012 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +static char *test_c_templ = + NL "#include " + NL "#include " + NL "#include " + NL "int main(int argc, char *argv[]) {" + NL " double magic[] = { %s };" + NL " errno = 0;\n" + NL " printf(\"%%f:%%d\\n\", %s(magic[atoi(argv[1])]), errno);" + NL " return 0;" + NL "}" + NL; + +/* returns 0 if couldn't run (emu), -1 on error and 1 when got enough results */ +static int try(int logdepth, const char *func, const char *values, char **res, int num_res) +{ + char *test_c, *fn = NULL, *fn_esc, *cmd, *out; + int n, len; + + len = strlen(test_c_templ) + strlen(func) + strlen(values) + 1; + test_c = malloc(len); + sprintf(test_c, test_c_templ, values, func); + + logprintf(logdepth, "testing '%s' on '%s'\n", func, values); + + if (compile_code(logdepth+1, test_c, &fn, NULL, NULL, "-lm") != 0) { + free(test_c); + return -1; + } + free(test_c); + + if (isblind(db_cwd)) { + /* assume corner cases passed as we have nothing better to do */ + for(n = 0; n < num_res; n++) + res[n] = strclone("unknown"); + unlink(fn); + free(fn); + return 0; + } + + fn_esc = shell_escape_dup(fn); + cmd = malloc(strlen(fn_esc) + 32); + for(n = 0; n < num_res; n++) { + sprintf(cmd, "%s %d", fn_esc, n); + if (run(logdepth+1, cmd, &out) == 0) { + char *s, *dot = NULL; + int is_int = 1; + for(s = out; *s != '\0'; s++) { + switch(*s) { + case '\n': + case '\r': + *s = '\0'; + goto at_end; + case '.': + dot = s; + break; + default: + if ((dot != NULL) && (*s != '0')) + is_int = 0; + *s = tolower(*s); + } + } + at_end:; + if ((is_int) && (dot != NULL)) + *dot = '\0'; + if (*out == '+') + res[n] = strclone(out+1); + else + res[n] = strclone(out); + free(out); + } + else + res[n] = strclone("fpe"); + } + free(cmd); + free(fn_esc); + unlink(fn); + free(fn); + return 1; +} + + +static int find_math_cc(const char *name, int logdepth, int fatal, char *func, char *inp, char **node_names, int num_tests) +{ + char *res[256]; + char node_name[256]; + int n, ret; + + ret = 0; + require("cc/cc", logdepth, fatal); + + report("Checking for %s() corner cases... ", func); + logprintf(logdepth, "find_math_cc_log: Checking for %s() corner cases... \n", func); + logdepth++; + + if (try(logdepth, func, inp, res, num_tests) >= 0) { + for(n = 0; n < num_tests; n++) { + char *sep; + report("."); + sep = strchr(res[n], ':'); + if (sep != NULL) { + *sep = 0; + sep++; + sprintf(node_name, "libs/math/cc/%s/%s/return", func, node_names[n]); + put(node_name, res[n]); + sprintf(node_name, "libs/math/cc/%s/%s/errno", func, node_names[n]); + put(node_name, sep); + } + else + ret = 1; + } + } + + /* avoid redetection */ + sprintf(node_name, "libs/math/cc/%s/presents", func); + put(node_name, strue); + + + report(ret ? "done with errors\n" : " done.\n"); + return ret; +} + +int find_math_cc_log(const char *name, int logdepth, int fatal) +{ + char *inp = "+0.0, -0.0, 1.0, -1.0, 1/0.0"; + char *nodes[] = {"p_0", "m_0", "p_1", "m_1", "p_inf"}; + return find_math_cc(name, logdepth, fatal, "log", inp, nodes, sizeof(nodes) / sizeof(char *)); +} diff --git a/scconfig/src/math/find_mfunc_cc.h b/scconfig/src/math/find_mfunc_cc.h new file mode 100644 index 0000000..e84831d --- /dev/null +++ b/scconfig/src/math/find_mfunc_cc.h @@ -0,0 +1 @@ +int find_math_cc_log(const char *name, int logdepth, int fatal); diff --git a/scconfig/src/parsgen/INIT.c b/scconfig/src/parsgen/INIT.c new file mode 100644 index 0000000..f0f30d6 --- /dev/null +++ b/scconfig/src/parsgen/INIT.c @@ -0,0 +1,2 @@ + deps_parsgen_init(); + diff --git a/scconfig/src/parsgen/INIT.h b/scconfig/src/parsgen/INIT.h new file mode 100644 index 0000000..248ce97 --- /dev/null +++ b/scconfig/src/parsgen/INIT.h @@ -0,0 +1 @@ +void deps_parsgen_init(); diff --git a/scconfig/src/parsgen/Makefile.plugin b/scconfig/src/parsgen/Makefile.plugin new file mode 100644 index 0000000..8f2c8bb --- /dev/null +++ b/scconfig/src/parsgen/Makefile.plugin @@ -0,0 +1,6 @@ +PARSGEN_CFLAGS = -DPLUGIN_PARSGEN +PARSGEN_OBJS = \ + $(BIN)/parsgen/find_parsgen.o + +$(BIN)/parsgen/find_parsgen.o: $(SRC)/parsgen/find_parsgen.c + $(CC) $(CFLAGS) -c $(SRC)/parsgen/find_parsgen.c -o $(BIN)/parsgen/find_parsgen.o diff --git a/scconfig/src/parsgen/find_parsgen.c b/scconfig/src/parsgen/find_parsgen.c new file mode 100644 index 0000000..9aab1fe --- /dev/null +++ b/scconfig/src/parsgen/find_parsgen.c @@ -0,0 +1,139 @@ +/* + scconfig - parser generator detection + Copyright (C) 2009 Tibor Palinkas + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Project page: http://repo.hu/projects/scconfig + Contact via email: scconfig [at] igor2.repo.hu +*/ + +#include +#include +#include +#include "libs.h" +#include "log.h" +#include "db.h" +#include "dep.h" + +int find_parsgen_flex(const char *name, int logdepth, int fatal) +{ + const char *test_flex = + NL "%%" + NL "foo { return 1; }" + NL "%%" + NL ; + char *out, *temp_in, *temp_in_esc, *cmd; + int ret; + char *lexfile = "lex.yy.c"; + (void) fatal; /* not used */ + + report("Checking for flex... "); + logprintf(logdepth, "find_flex: trying to find flex...\n"); + logdepth++; + + if (is_file(lexfile)) { + report("ERROR: %s exists, and I don't dare to delete it. Can't test flex, please remove the file by hand.\n", lexfile); + logprintf(logdepth, "ERROR: %s exists, and I don't dare to delete it. Can't test flex, please remove the file by hand.\n", lexfile); + exit(1); + } + temp_in = tempfile_dump(test_flex, ".lex"); + temp_in_esc = shell_escape_dup(temp_in); + cmd = malloc(strlen(temp_in_esc) + 16); + sprintf(cmd, "flex %s", temp_in_esc); + free(temp_in_esc); + ret = run(logdepth, cmd, &out); + remove(temp_in); + free(temp_in); + if (out != NULL) + free(out); + + if (is_file(lexfile)) { + remove(lexfile); + if (ret == 0) { + put("parsgen/flex", "flex"); + put("parsgen/flex/presents", strue); + report("Found.\n"); + return 0; + } + } + + put("parsgen/flex/presents", sfalse); + report("Not found.\n"); + return 1; +} + +int find_parsgen_bison(const char *name, int logdepth, int fatal) +{ + const char *test_bison = + NL "%union { char *str; double num;}" + NL "%%" + NL "%token TOK1;" + NL "%token TOK2;" + NL "root: one | two;" + NL "one: TOK1;" + NL "two: TOK2;" + NL ; + char *out, *temp_in, *temp_in_esc, *cmd; + int ret; + char *bisfile, *s; + (void) fatal; /* not used */ + + report("Checking for bison... "); + logprintf(logdepth, "find_bison: trying to find bison...\n"); + logdepth++; + + temp_in = tempfile_dump(test_bison, ".y"); + bisfile = malloc(strlen(temp_in) + 32); + strcpy(bisfile, temp_in); + s = strrchr(bisfile+1, '.'); + strcpy(s, ".tab.c"); + if (is_file(bisfile)) { + report("ERROR: %s exists, and I don't dare to delete it. Can't test bison, please remove the file by hand.\n", bisfile); + logprintf(logdepth, "ERROR: %s exists, and I don't dare to delete it. Can't test bison, please remove the file by hand.\n", bisfile); + exit(1); + } + temp_in_esc = shell_escape_dup(temp_in); + cmd = malloc(strlen(temp_in_esc) + 16); + sprintf(cmd, "bison %s", temp_in_esc); + free(temp_in_esc); + + + ret = run(logdepth, cmd, &out); + remove(temp_in); + free(temp_in); + if (out != NULL) + free(out); + + if (is_file(bisfile)) { + remove(bisfile); + if (ret == 0) { + put("parsgen/bison", "bison"); + put("parsgen/bison/presents", strue); + report("Found.\n"); + return 0; + } + } + + put("parsgen/bison/presents", sfalse); + report("Not found.\n"); + return 1; +} + +void deps_parsgen_init() +{ + dep_add("parsgen/flex/*", find_parsgen_flex); + dep_add("parsgen/bison/*", find_parsgen_bison); +} diff --git a/scconfig/src/tmpasm/Makefile b/scconfig/src/tmpasm/Makefile new file mode 100644 index 0000000..a41fe0a --- /dev/null +++ b/scconfig/src/tmpasm/Makefile @@ -0,0 +1,16 @@ +CFLAGS = -Wall -g \ + -I../default -DTMPASM_TESTER \ + +tester: tester.o tmpasm.o debug.o tmpasm_scconfig.o openfiles.o \ + ../default/db.o ../default/ht.o ../default/str.o ../default/log.o \ + ../default/regex.o ../default/lib_uniqinc.o + +tmpasm.o: tmpasm.c tmpasm.h + +test: regression/Makefile + cd regression && make + +regression/Makefile: regression/Makefile.in tester + ./tester -e < regression/Makefile.in > regression/Makefile + +debug.o: debug.c debug.h tmpasm.h diff --git a/scconfig/src/tmpasm/Makefile.plugin b/scconfig/src/tmpasm/Makefile.plugin new file mode 100644 index 0000000..1c85124 --- /dev/null +++ b/scconfig/src/tmpasm/Makefile.plugin @@ -0,0 +1,16 @@ +TMPASM_OBJS = \ + $(BIN)/tmpasm/tmpasm.o \ + $(BIN)/tmpasm/tmpasm_scconfig.o \ + $(BIN)/tmpasm/openfiles.o + +TMPASM_CFLAGS = -I$(SRC)/tmpasm + +$(BIN)/tmpasm/tmpasm.o: $(SRC)/tmpasm/tmpasm.c $(SRC)/tmpasm/tmpasm.h $(SRC)/default/dep.h $(SRC)/default/log.h $(SRC)/default/regex.h + $(CC) $(CFLAGS) -c $(SRC)/tmpasm/tmpasm.c -o $(BIN)/tmpasm/tmpasm.o + +$(BIN)/tmpasm/tmpasm_scconfig.o: $(SRC)/tmpasm/tmpasm_scconfig.c $(SRC)/tmpasm/tmpasm.h $(SRC)/default/libs.h $(SRC)/default/log.h $(SRC)/default/regex.h + $(CC) $(CFLAGS) -c $(SRC)/tmpasm/tmpasm_scconfig.c -o $(BIN)/tmpasm/tmpasm_scconfig.o + +$(BIN)/tmpasm/openfiles.o: $(SRC)/tmpasm/openfiles.c + $(CC) $(CFLAGS) -c $(SRC)/tmpasm/openfiles.c -o $(BIN)/tmpasm/openfiles.o + diff --git a/scconfig/src/tmpasm/TODO b/scconfig/src/tmpasm/TODO new file mode 100644 index 0000000..a31b122 --- /dev/null +++ b/scconfig/src/tmpasm/TODO @@ -0,0 +1,12 @@ +- regression test syntax errors and improve syntax error reporting + - switch: + - data instead of case + - multiple defaults + - deafult must be at end of the list + - case out of switch + - default out of switch + - forarch +- tutorial + - append +- [[]] eval? what's the output? -> generate and include scripts (may need mktemp binding) +- update docs diff --git a/scconfig/src/tmpasm/debug.c b/scconfig/src/tmpasm/debug.c new file mode 100644 index 0000000..f56acc9 --- /dev/null +++ b/scconfig/src/tmpasm/debug.c @@ -0,0 +1,111 @@ +#include +#include "tmpasm.h" + +static void indent(FILE *f, int depth) +{ + for(;depth > 0; depth--) fputc(' ', f); +} + +static void print_arg(FILE *f, tmpasm_arg_t *a) +{ + if (a == NULL) { + fprintf(f, "*NULL - broken AST*"); + return; + } + if (a->next != NULL) { + /* block mode */ + fprintf(f, "[~"); + for(;a != NULL; a = a->next) { + if (a->is_addr) + fprintf(f, "~%s~", a->data); + else + fprintf(f, "%s", a->data); + } + fprintf(f, "~]"); + } + else { + if (a->is_addr) + fprintf(f, "%s", a->data); + else + fprintf(f, "{%s}", a->data); + } +} + +static void print_loc(FILE *f, tmpasm_exec_t *c) +{ + if ((c->line != 0) || (c->col != 0)) + fprintf(f, " [at %d:%d]\n", c->line, c->col); + else + fprintf(f, "\n"); +} + +static void dump(FILE *f, int depth, tmpasm_exec_t *c) +{ + tmpasm_case_t *cc; + int n; + for(; c != NULL; c = c->next) { + switch(c->kw) { + case KW_NOP: + indent(f, depth); + fprintf(f, "(NOP)"); + print_loc(f, c); + break; + case KW_none: + indent(f, depth); + fprintf(f, "%s", c->payload.instr.call_name); + print_loc(f, c); + for(n = 0; n < c->payload.instr.argc; n++) { + indent(f, depth+1); + fprintf(f, "arg: "); + print_arg(f, c->payload.instr.argv[n]); + fprintf(f, "\n"); + } + break; + case KW_IF: + indent(f, depth); + fprintf(f, "if "); + print_arg(f, c->payload.fc_if.cond); + print_loc(f, c); + indent(f, depth); + fprintf(f, "then:\n"); + dump(f, depth+1, c->payload.fc_if.code_then); + indent(f, depth); + fprintf(f, "else:\n"); + dump(f, depth+1, c->payload.fc_if.code_else); + break; + case KW_FOREACH: + indent(f, depth); + fprintf(f, "foreach %s in ", c->payload.fc_foreach.loop_var); + print_arg(f, c->payload.fc_foreach.data); + print_loc(f, c); + dump(f, depth+1, c->payload.fc_foreach.code_body); + break; + case KW_SWITCH: + indent(f, depth); + fprintf(f, "switch "); + print_arg(f, c->payload.fc_switch.cond); + print_loc(f, c); + for(cc = c->payload.fc_switch.first; cc != NULL; cc = cc->next) { + indent(f, depth+1); + if (cc->data != NULL) { + fprintf(f, "case "); + print_arg(f, cc->data); + fprintf(f, "\n"); + } + else + printf("default\n"); + dump(f, depth+2, cc->body); + } + break; + default: + indent(f, depth); + fprintf(f, "invalid kw "); + print_loc(f, c); + } + } +} + +void tmpasm_dump(tmpasm_t *ctx, FILE *f) +{ + dump(f, 0, ctx->code); +} diff --git a/scconfig/src/tmpasm/debug.h b/scconfig/src/tmpasm/debug.h new file mode 100644 index 0000000..7841be1 --- /dev/null +++ b/scconfig/src/tmpasm/debug.h @@ -0,0 +1,2 @@ +void tmpasm_dump(tmpasm_t *ctx, FILE *f); + diff --git a/scconfig/src/tmpasm/openfiles.c b/scconfig/src/tmpasm/openfiles.c new file mode 100644 index 0000000..92ac58d --- /dev/null +++ b/scconfig/src/tmpasm/openfiles.c @@ -0,0 +1,95 @@ +#include +#include +#include "openfiles.h" +#include "libs.h" + +static openfile_t *find_file_by_name(openfiles_t *of, const char *name, int alloc, const char *mode, int recursion) +{ + int n; + struct stat buf; + FILE *f; + + if (recursion > 4) { + fprintf(stderr, "scconfig internal error: openfiles infinite recursion for %s\n", name); + abort(); + } + + if (stat(name, &buf) != 0) { + /* File does not exist - try to create it or return NULL */ + if (*mode == 'w') { + f = fopen(name, "w"); + if (f == NULL) + return NULL; + fclose(f); + return find_file_by_name(of, name, alloc, mode, recursion + 1); + } + return NULL; + } + + /* look for an existing open file in the list */ + for(n = 0; n < of->used; n++) + if ((of->files[n].dev == buf.st_dev) && (of->files[n].ino == buf.st_ino) && (strcmp(of->files[n].mode, mode) == 0)) + return &(of->files[n]); + + if (!alloc) + return NULL; + + /* File exists but not on the list yet, allocate a new slot for it */ + /* TODO: try to find an empty slot first */ + if (of->used >= of->alloced) { + of->alloced += 16; + of->files = realloc(of->files, sizeof(openfile_t) * of->alloced); + } + + n = of->used; + of->files[n].dev = buf.st_dev; + of->files[n].ino = buf.st_ino; + of->files[n].f = NULL; + of->files[n].mode = strclone(mode); + of->used++; + return &(of->files[n]); +} + +void release(openfile_t *o) +{ + if (o->mode != NULL) { + free(o->mode); + o->mode = NULL; + } + if (o->f != NULL) { + fclose(o->f); + o->f = NULL; + } + o->dev = -1; + o->ino = -1; +} + +FILE *openfile_open(openfiles_t *of, const char *fn, const char *mode) +{ + openfile_t *o; + o = find_file_by_name(of, fn, 1, mode, 0); + if (o == NULL) + return NULL; + o->f = fopen(fn, mode); + if (o->f == NULL) { + release(o); + return NULL; + } + return o->f; +} + +void openfile_closeall(openfiles_t *of) +{ + int n; + if (of->files == NULL) + return; + for(n = 0; n < of->used; n++) + release(&(of->files[n])); +} + +void openfile_free(openfiles_t *of) +{ + openfile_closeall(of); + if (of->files != NULL) + free(of->files); +} diff --git a/scconfig/src/tmpasm/openfiles.h b/scconfig/src/tmpasm/openfiles.h new file mode 100644 index 0000000..bb378c6 --- /dev/null +++ b/scconfig/src/tmpasm/openfiles.h @@ -0,0 +1,23 @@ +#include +#include +#include +#include + +typedef struct openfile_s { + FILE *f; + + /* identify the file: */ + dev_t dev; + ino_t ino; + + char *mode; +} openfile_t; + +typedef struct openfiles_s { + int alloced, used; + openfile_t *files; +} openfiles_t; + +FILE *openfile_open(openfiles_t *of, const char *fn, const char *mode); +void openfile_closeall(openfiles_t *of); +void openfile_free(openfiles_t *of); diff --git a/scconfig/src/tmpasm/regression/Makefile.in b/scconfig/src/tmpasm/regression/Makefile.in new file mode 100644 index 0000000..5a7b73e --- /dev/null +++ b/scconfig/src/tmpasm/regression/Makefile.in @@ -0,0 +1,57 @@ +# list of tests +put tests [@ + Tutor01_hello + Tutor02_vars + Tutor03_blocks + Tutor04_if + Tutor05_switch + Tutor06_foreach + Tutor07_sub + Tutor08_uniq + Tutor09_ui + Tutor10_include_redir + Tutor11_missing + Tutor12_halt + comment + foreach + if + switch + test + then + append + order + err_if_end + err_if_else + err_excess_end + err_switch_end + err_switch_nocond + err_no_end +@] + +uniq tests + +put test_diffs tests +gsub test_diffs {[ \t\r\n]+} { } +gsub test_diffs {\\>} {.diff} +# TODO: replace this with wrap +sub test_diffs {^ *} {} +sub test_diffs { *$} {} + +print [@### PLEASE DO NOT EDIT THIS FILE, it has been generated from Makefile.in. ### + +TESTER=../tester + +all: @test_diffs@ + +# Explicit test rules +@] + +foreach test in tests +print [@ +@test@.out: @test@.gasm $(TESTER) Makefile + $(TESTER) < @test@.gasm > @test@.out 2>&1 + +@test@.diff: @test@.ref @test@.out + diff -u @test@.ref @test@.out +@] +end diff --git a/scconfig/src/tmpasm/regression/Tutor01_hello.gasm b/scconfig/src/tmpasm/regression/Tutor01_hello.gasm new file mode 100644 index 0000000..d37cb2b --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor01_hello.gasm @@ -0,0 +1,18 @@ +# Comments start with # and end at the end of the line; comments +# can be started anywhere outside of strings and blocks + +# Print will print each argument without appending a newline. An argument +# is a data, string literals are written in brace. Escape sequences +# are as usual +print {hello world!\n} + +# Print doesn't have any hidden side effect: no separators printed +# between arguments +print {hello} {world!} {\n} + +# Print works without an argument as well: it just prints nothing +print + +# instructions are separated by newlines and/or semicolons: +print {HELLO}; print { WORLD!};;;; print {\n}; + diff --git a/scconfig/src/tmpasm/regression/Tutor01_hello.ref b/scconfig/src/tmpasm/regression/Tutor01_hello.ref new file mode 100644 index 0000000..bfb644b --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor01_hello.ref @@ -0,0 +1,16 @@ +print [at 7:1] + arg: {hello world! +} +print [at 11:1] + arg: {hello} + arg: {world!} + arg: { +} +print [at 14:1] +print [at 17:1] + arg: {HELLO} +print [at 17:16] + arg: { WORLD!} +print [at 17:36] + arg: { +} diff --git a/scconfig/src/tmpasm/regression/Tutor02_vars.gasm b/scconfig/src/tmpasm/regression/Tutor02_vars.gasm new file mode 100644 index 0000000..5b1526b --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor02_vars.gasm @@ -0,0 +1,26 @@ +# Variables are stored in a hash. Variable names follow the normal +# identifier rules with less restrictions (can start with number) +# To set the value of a variable, use: put var value +put myvar {Hello world!\n} + +# Referencing the variable is done by using its name +print myvar + +# In most context arguments are data; data can be both string literal and +# variable reference: +print {Hello universe! } myvar + +# the second var of put is just data, can be string or variable; copying +# a variable: +put str {cats raining from the sky} +put tmp str +print str {==} tmp {\n} + +# the ? prefix results in empty string if a variable doesnt exist, instead +# of throwing a runtime error +print {safe get: '} ?nonexist {'\n} + +# the & prefix evaluates to "true" or "false" depending on whether the node +# exists in the tree or not +print {exists (no): } &nonexist {\n} +print {exists (yes): } &myvar {\n} diff --git a/scconfig/src/tmpasm/regression/Tutor02_vars.ref b/scconfig/src/tmpasm/regression/Tutor02_vars.ref new file mode 100644 index 0000000..9f19f02 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor02_vars.ref @@ -0,0 +1,36 @@ +put [at 4:1] + arg: myvar + arg: {Hello world! +} +print [at 7:1] + arg: myvar +print [at 11:1] + arg: {Hello universe! } + arg: myvar +put [at 15:1] + arg: str + arg: {cats raining from the sky} +put [at 16:1] + arg: tmp + arg: str +print [at 17:1] + arg: str + arg: {==} + arg: tmp + arg: { +} +print [at 21:1] + arg: {safe get: '} + arg: ?nonexist + arg: {' +} +print [at 25:1] + arg: {exists (no): } + arg: &nonexist + arg: { +} +print [at 26:1] + arg: {exists (yes): } + arg: &myvar + arg: { +} diff --git a/scconfig/src/tmpasm/regression/Tutor03_blocks.gasm b/scconfig/src/tmpasm/regression/Tutor03_blocks.gasm new file mode 100644 index 0000000..18003e0 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor03_blocks.gasm @@ -0,0 +1,31 @@ +# Blocks are special syntax to make it easier to handle large, +# to-be-printed-verbatim blocks of data, which is essential in a template +# language. Blocks are enclosed in [$ $], where $ is an arbitrary character +# that shall be chosen by the programmer, per block; once a characher is +# chosen, it can not appear in the string. There is no backslash escaping +# in blocks. Blocks can be used anywhere where strings could be used. +print [@this is a string@] {\n} + +print {--\n} + +put myblk [!a block +of multiline +data. +!] + +print myblk + +# A special feature of the block is inline variable substitution using the same +# separator character chosen at the opening brace. In the example below +# myvar is substituted because it is sorrounded by the separator (@ in +# the first case and $ in the second case). Whitespace and newlines +# are preserved in the block, even in the inline variable name! + +put myvar {world} +print {--\n} +print [@ hello @myvar@! @] {\n} +print {--\n} +print [$ + hi @myvar@! +$] +print {--\n} diff --git a/scconfig/src/tmpasm/regression/Tutor03_blocks.ref b/scconfig/src/tmpasm/regression/Tutor03_blocks.ref new file mode 100644 index 0000000..e091176 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor03_blocks.ref @@ -0,0 +1,35 @@ +print [at 7:1] + arg: {this is a string} + arg: { +} +print [at 9:1] + arg: {-- +} +put [at 11:1] + arg: myblk + arg: {a block +of multiline +data. +} +print [at 16:1] + arg: myblk +put [at 24:1] + arg: myvar + arg: {world} +print [at 25:1] + arg: {-- +} +print [at 26:1] + arg: [~ hello ~myvar~! ~] + arg: { +} +print [at 27:1] + arg: {-- +} +print [at 28:1] + arg: { + hi @myvar@! +} +print [at 31:1] + arg: {-- +} diff --git a/scconfig/src/tmpasm/regression/Tutor04_if.gasm b/scconfig/src/tmpasm/regression/Tutor04_if.gasm new file mode 100644 index 0000000..3b45fa7 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor04_if.gasm @@ -0,0 +1,31 @@ +put myvar {true} + +# The simplest flow control is an if. It takes its first argument and +# calls the environment to decide if it is true or false. If it's true +# the "then" branch is executed, if it's false, the "else" branch runs. +if myvar then + print {myvar is true (1)\n} +else + print {myvar is false (1)\n} +end + + +# it is possible to omit the else branch +if myvar then + print {myvar is true (2)\n} +end + +# the then branch may be empty: +if myvar then else + print {myvar is false (3)\n} +end + +# embedding controls is legal: +put foo {false} +if myvar then + if foo then + print {myvar and bar are true (4)\n} + else + print {myvar is true, bar is false (4)\n} + end +end diff --git a/scconfig/src/tmpasm/regression/Tutor04_if.ref b/scconfig/src/tmpasm/regression/Tutor04_if.ref new file mode 100644 index 0000000..5680761 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor04_if.ref @@ -0,0 +1,42 @@ +put [at 1:1] + arg: myvar + arg: {true} +if myvar [at 6:10] +then: + print [at 7:2] + arg: {myvar is true (1) +} +else: + print [at 9:2] + arg: {myvar is false (1) +} +if myvar [at 14:10] +then: + print [at 15:2] + arg: {myvar is true (2) +} +else: + (NOP) +if myvar [at 19:10] +then: + (NOP) +else: + print [at 20:2] + arg: {myvar is false (3) +} +put [at 24:1] + arg: foo + arg: {false} +if myvar [at 25:10] +then: + if foo [at 26:9] + then: + print [at 27:3] + arg: {myvar and bar are true (4) +} + else: + print [at 29:3] + arg: {myvar is true, bar is false (4) +} +else: + (NOP) diff --git a/scconfig/src/tmpasm/regression/Tutor05_switch.gasm b/scconfig/src/tmpasm/regression/Tutor05_switch.gasm new file mode 100644 index 0000000..6e0aa88 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor05_switch.gasm @@ -0,0 +1,42 @@ +# Switch is similar to case in posix shell and switch in C. It takes a +# data argument and matches is against cases until the first match. It +# executes the code for that match and stops executing the switch (unlike +# in C, and like in sh, there is no fall-thru). A default case can be +# defined as a catch-all rule. +# +# Scconfig uses regex matching (unlike sh (shell globbing) and C (integer)). +# +# The first word after the switch keyword is the string that is matched +# against case patterns; the first word after a case is the pattern +# the string is matched against. Each branch must be terminated by an "end", +# just as the whole switch. Default doesn't have pattern, instructions start +# immediately. + +put myvar {foobar} +switch myvar + case {baz} put res {1}; print {this is baz.\n}; end; + case {^oob} put res {2}; print {did you mean out-of-band?\n}; end; + case {^f} put res {3}; print {starts with f.\n}; end; + case {oob} put res {4}; print {OOB!\n}; end; + default put res {0}; print {none.\n}; end; +end; + +print {result is: } res {\n} + + +# data is data - can be block as well, anywhere, in switch or case: +put patt {^number$} +put REF {3} +switch [@num @res@ ber@] + case patt print {empty\n}; end; + case [!^num !REF!!] print {reference\n}; end; +end + +# one of the uses of switch is to construct an if-then-else that uses +# matching instead of checking for true/false; the following example +# demonstrates how an "if cond matches {lob}" is done with switch. +put cond {blobb} +switch cond + case {lob} print {"then"\n}; end + default print {"else"\n}; end +end diff --git a/scconfig/src/tmpasm/regression/Tutor05_switch.ref b/scconfig/src/tmpasm/regression/Tutor05_switch.ref new file mode 100644 index 0000000..00ba084 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor05_switch.ref @@ -0,0 +1,71 @@ +put [at 15:1] + arg: myvar + arg: {foobar} +switch myvar [at 16:1] + case {baz} + put [at 17:14] + arg: res + arg: {1} + print [at 17:27] + arg: {this is baz. +} + case {^oob} + put [at 18:14] + arg: res + arg: {2} + print [at 18:27] + arg: {did you mean out-of-band? +} + case {^f} + put [at 19:14] + arg: res + arg: {3} + print [at 19:27] + arg: {starts with f. +} + case {oob} + put [at 20:14] + arg: res + arg: {4} + print [at 20:27] + arg: {OOB! +} + default + put [at 21:14] + arg: res + arg: {0} + print [at 21:27] + arg: {none. +} +print [at 24:1] + arg: {result is: } + arg: res + arg: { +} +put [at 28:1] + arg: patt + arg: {^number$} +put [at 29:1] + arg: REF + arg: {3} +switch [~num ~res~ ber~] [at 30:1] + case patt + print [at 31:23] + arg: {empty +} + case [~^num ~REF~~] + print [at 32:23] + arg: {reference +} +put [at 38:1] + arg: cond + arg: {blobb} +switch cond [at 39:1] + case {lob} + print [at 40:15] + arg: {"then" +} + default + print [at 41:15] + arg: {"else" +} diff --git a/scconfig/src/tmpasm/regression/Tutor06_foreach.gasm b/scconfig/src/tmpasm/regression/Tutor06_foreach.gasm new file mode 100644 index 0000000..c0e57fc --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor06_foreach.gasm @@ -0,0 +1,35 @@ +# The only loop tmpasm implements is a foreach that iterates on a list. +# How the list is split into items is up to the environment. In scconfig +# the list is white space separated by default. The word following foreach +# must be the name of a variable (that will be set to the next item before +# each iteration). The word after "in" is data (string, variable, block). + +foreach item in {this is a list of words} + print item {\n} +end + +# Like any other control, foreach can be nested. The following +# example will iterate item on foo, bar and baz, printing 3 words +# for each from a block: {next:}, the item and a newline. The newline +# is specified as a vairable since \ escaping does not work in blocks. +put nl {\n} +foreach item in {foo bar baz} + foreach w in [@next: @item@@nl@@] + print w + end +end + +# Foreach makes a copy of the list before the first iteration. This +# is relevant if the list is a variable that may change during the +# loop. The following exmaple takes a list of libs and if -lsdl is +# present on the list, appends -lsvga to the list and inserts -lm; +# these changes to "libs" will not alter the loop. +put libs {-lsdl -ltcl8.4} +foreach l in libs + print {l=} l {\n} + switch l + case {^-lsdl} put libs [@-lm @libs@ -lsvga@]; end + end +end +print {libs=} libs {\n} + diff --git a/scconfig/src/tmpasm/regression/Tutor06_foreach.ref b/scconfig/src/tmpasm/regression/Tutor06_foreach.ref new file mode 100644 index 0000000..f8fccde --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor06_foreach.ref @@ -0,0 +1,32 @@ +foreach item in {this is a list of words} [at 7:1] + print [at 8:2] + arg: item + arg: { +} +put [at 15:1] + arg: nl + arg: { +} +foreach item in {foo bar baz} [at 16:1] + foreach w in [~next: ~item~~nl~~] [at 17:2] + print [at 18:3] + arg: w +put [at 27:1] + arg: libs + arg: {-lsdl -ltcl8.4} +foreach l in libs [at 28:1] + print [at 29:2] + arg: {l=} + arg: l + arg: { +} + switch l [at 30:2] + case {^-lsdl} + put [at 31:19] + arg: libs + arg: [~-lm ~libs~ -lsvga~] +print [at 34:1] + arg: {libs=} + arg: libs + arg: { +} diff --git a/scconfig/src/tmpasm/regression/Tutor07_sub.gasm b/scconfig/src/tmpasm/regression/Tutor07_sub.gasm new file mode 100644 index 0000000..ee3f4e2 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor07_sub.gasm @@ -0,0 +1,31 @@ +# The following regex sub utils are scconfig specific. + +# Regex: substitute the first match of a pattern with str in a variable: +# sub address pattern str + +put myvar {Hello world!\n} +sub myvar {l} {2} +print myvar + +# Address must resolve to an existing variable, pattern and str are data; +# this means address can be a string that holds a variable name, it's the +# same as if it was an addreess: +sub {myvar} {l} {3} +print myvar + +# Or it can be a block, which makes indirect addressing possible: +# in [@@pointer@@] the @pointer@ part will be substituted with +# the value of pointer, which is "myvar". +put pointer {myvar} +sub [@@pointer@@] {l} {4} +print myvar + +# Since pattern and str are also data, address and blocks work there as well +# (but this is _not_ a regex backreference): +put punctuation {[!?.]} +sub [@@pointer@@] punctuation [@ PUNCT:@punctuation@@] +print myvar + +# gsub does the same, but substutites all matches, not only the first: +gsub [@@pointer@@] {o} {_0_} +print myvar diff --git a/scconfig/src/tmpasm/regression/Tutor07_sub.ref b/scconfig/src/tmpasm/regression/Tutor07_sub.ref new file mode 100644 index 0000000..a59ad25 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor07_sub.ref @@ -0,0 +1,40 @@ +put [at 6:1] + arg: myvar + arg: {Hello world! +} +sub [at 7:1] + arg: myvar + arg: {l} + arg: {2} +print [at 8:1] + arg: myvar +sub [at 13:1] + arg: {myvar} + arg: {l} + arg: {3} +print [at 14:1] + arg: myvar +put [at 19:1] + arg: pointer + arg: {myvar} +sub [at 20:1] + arg: [~~pointer~~] + arg: {l} + arg: {4} +print [at 21:1] + arg: myvar +put [at 25:1] + arg: punctuation + arg: {[!?.]} +sub [at 26:1] + arg: [~~pointer~~] + arg: punctuation + arg: [~ PUNCT:~punctuation~~] +print [at 27:1] + arg: myvar +gsub [at 30:1] + arg: [~~pointer~~] + arg: {o} + arg: {_0_} +print [at 31:1] + arg: myvar diff --git a/scconfig/src/tmpasm/regression/Tutor08_uniq.gasm b/scconfig/src/tmpasm/regression/Tutor08_uniq.gasm new file mode 100644 index 0000000..5b3d093 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor08_uniq.gasm @@ -0,0 +1,64 @@ +# The following string util is scconfig specific. + +# Uniq: filter a list of words and remove duplicate items. This instruction +# is useful for using text nodes as lists. +# The simplest syntax is "uniq address" which will do the filtering on +# the content of a database address. The default separator is \n +put list [@this +is +a +list +of +words, +a +list +of +words. +@] +print {original:\n} list {\n} +uniq list +print {uniq:\n} list {\n} + +# If the original list needs to be left intact, the alternative syntax is +# "uniq dest-addr src-addr": +put foo [@this +foo +is +a +this +foo +@] +uniq tmp foo +print {original:\n} foo {\nuniq:\n} tmp {\n} + +# Note: the algorithm of uniq is slow, and will not be efficient for very long +# lists. Uniq preserves the order of words (by their first appearance). + +# Sortuniq performs the same action, except it also orders the list using +# qsort() (so it is even slower on big lists). +sortuniq tmp foo +print {\nsortuniq:\n} tmp {\n} + + +# A typical use case is having #defines and #includes on a list; #defines +# should end up on the top, but order of #incldues should be preserved, so +# sortuniq is not an option. When uniq is called with more than 2 argument,s +# the extra arguments specify group regexps; the input is first organized into +# groups then uniq is ran on these groups. Anything that doesn't match the +# groups listed are put in a "misc" group that ends up as the last group. +put list [@#define foo +#include "foo20.h" +#include "foo10.h" +/* misc1 */ +#define bar +#include "bar1.h" +#include "bar2.h" +/* misc2 */ +@] + +# set input field separator to \n so uniq is splitting by lines, not by +# words +put /tmpasm/IFS {\n} + +uniq tmp list {^#define} {^#include} +print {original:\n} list {\ngrouped uniq:\n} tmp {\n} diff --git a/scconfig/src/tmpasm/regression/Tutor08_uniq.ref b/scconfig/src/tmpasm/regression/Tutor08_uniq.ref new file mode 100644 index 0000000..752ca42 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor08_uniq.ref @@ -0,0 +1,89 @@ +put [at 7:1] + arg: list + arg: {this +is +a +list +of +words, +a +list +of +words. +} +print [at 18:1] + arg: {original: +} + arg: list + arg: { +} +uniq [at 19:1] + arg: list +print [at 20:1] + arg: {uniq: +} + arg: list + arg: { +} +put [at 24:1] + arg: foo + arg: {this +foo +is +a +this +foo +} +uniq [at 31:1] + arg: tmp + arg: foo +print [at 32:1] + arg: {original: +} + arg: foo + arg: { +uniq: +} + arg: tmp + arg: { +} +sortuniq [at 39:1] + arg: tmp + arg: foo +print [at 40:1] + arg: { +sortuniq: +} + arg: tmp + arg: { +} +put [at 49:1] + arg: list + arg: {#define foo +#include "foo20.h" +#include "foo10.h" +/* misc1 */ +#define bar +#include "bar1.h" +#include "bar2.h" +/* misc2 */ +} +put [at 61:1] + arg: /tmpasm/IFS + arg: { +} +uniq [at 63:1] + arg: tmp + arg: list + arg: {^#define} + arg: {^#include} +print [at 64:1] + arg: {original: +} + arg: list + arg: { +grouped uniq: +} + arg: tmp + arg: { +} diff --git a/scconfig/src/tmpasm/regression/Tutor09_ui.gasm b/scconfig/src/tmpasm/regression/Tutor09_ui.gasm new file mode 100644 index 0000000..b4a7730 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor09_ui.gasm @@ -0,0 +1,15 @@ +# The following user interface utils are scconfig specific. + +# The report infrstructure is the main UI in scconfig. It prints +# messages to the console. The "report" instruction works similar +# to print, but its output is always the console, immune to +# redirections and default file output (tmpasm is most commonly +# used for generating files, so the default output file is not +# the console but a file being generated) +put myvar {!\n} +report {hello } {world} myvar +report [@hello world@myvar@@] + +# If an error is detected during generation of a file, the script should abort. +# This is a direct call to abort(2). +abort diff --git a/scconfig/src/tmpasm/regression/Tutor09_ui.ref b/scconfig/src/tmpasm/regression/Tutor09_ui.ref new file mode 100644 index 0000000..2873077 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor09_ui.ref @@ -0,0 +1,11 @@ +put [at 9:1] + arg: myvar + arg: {! +} +report [at 10:1] + arg: {hello } + arg: {world} + arg: myvar +report [at 11:1] + arg: [~hello world~myvar~~] +abort [at 15:1] diff --git a/scconfig/src/tmpasm/regression/Tutor10_include_redir.gasm b/scconfig/src/tmpasm/regression/Tutor10_include_redir.gasm new file mode 100644 index 0000000..0fb1b52 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor10_include_redir.gasm @@ -0,0 +1,52 @@ +# NOTE: THIS EXAMPLE WILL NOT WORK IN THE ON-LINE WEB VERSION + +# These features are scconfig specific. + +# There is a default output file coming from the environment; this +# is the file being generated. In the most common cases this is the +# only file the script will ever write. Any "print" instruction will +# write this file by default. However, sometimes it is handy +# to generate a small misc file during generating a large file. Thus +# the output file that "print" writes is not hardwired. Instead, there +# is the default output file and the current output file. Instruction +# "redir" can change the current output file. + +print {this goes to the default output\n} + +# redirect to Tutor10.inc; any "print" until the next "redir" will +# write that file +redir {Tutor10.inc} +print {# this is a generated file.} +print [@ + print {hello world from my include!\n} +@] + +# switch back to the default output +redir +print {back at default output.\n} + + +# Dynamic include: the script may include another script, runtime. When +# an include instruction is executed, the referred file is open, read, +# parsed and executed, recusively. +# +# Include being dynamic (or runtime) is unusual, but has the following +# advantages: +# - file name for inclusion can be calculated +# - conditional include is easily possible without an extra preprocessing layer +# - it is possible to generate a script on the fly and include it (sort of eval) +# Drawbacks: +# - it is possible to end up in an infinite loop that will only stop when +# resources run out (open fds or memory) +# - it is slow, e.g. if the body of a foreach contains include, the whole +# read-parse-execute procedure is repeated for each item +# Redir files are overwritten when first open from an execution. + +print {Include:\n} +include {Tutor10.inc} + +# NOTE: the above script works only because "redir" has a side effect: +# whenever redirection switches away from a file, that file is flushed. +# This happens even if the new current output is the same as the old +# current output (no actual switch takes place). + diff --git a/scconfig/src/tmpasm/regression/Tutor10_include_redir.ref b/scconfig/src/tmpasm/regression/Tutor10_include_redir.ref new file mode 100644 index 0000000..c1349f0 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor10_include_redir.ref @@ -0,0 +1,20 @@ +print [at 14:1] + arg: {this goes to the default output +} +redir [at 18:1] + arg: {Tutor10.inc} +print [at 19:1] + arg: {# this is a generated file.} +print [at 20:1] + arg: { + print {hello world from my include!\n} +} +redir [at 25:1] +print [at 26:1] + arg: {back at default output. +} +print [at 45:1] + arg: {Include: +} +include [at 46:1] + arg: {Tutor10.inc} diff --git a/scconfig/src/tmpasm/regression/Tutor11_missing.gasm b/scconfig/src/tmpasm/regression/Tutor11_missing.gasm new file mode 100644 index 0000000..9491210 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor11_missing.gasm @@ -0,0 +1,11 @@ +# conditional/safe variable reference: name prefixed by ? +# if the variable does not exist, no error is thrown but empty +# string is returned + +if ?a then + print {empty} +else + print {not empty} +end + +put b ?a diff --git a/scconfig/src/tmpasm/regression/Tutor11_missing.out b/scconfig/src/tmpasm/regression/Tutor11_missing.out new file mode 100644 index 0000000..0d67ac2 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor11_missing.out @@ -0,0 +1,10 @@ +if ?a [at 5:7] +then: + print [at 6:2] + arg: {empty} +else: + print [at 8:2] + arg: {not empty} +put [at 11:1] + arg: b + arg: ?a diff --git a/scconfig/src/tmpasm/regression/Tutor11_missing.ref b/scconfig/src/tmpasm/regression/Tutor11_missing.ref new file mode 100644 index 0000000..0d67ac2 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor11_missing.ref @@ -0,0 +1,10 @@ +if ?a [at 5:7] +then: + print [at 6:2] + arg: {empty} +else: + print [at 8:2] + arg: {not empty} +put [at 11:1] + arg: b + arg: ?a diff --git a/scconfig/src/tmpasm/regression/Tutor12_halt.gasm b/scconfig/src/tmpasm/regression/Tutor12_halt.gasm new file mode 100644 index 0000000..4b0587a --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor12_halt.gasm @@ -0,0 +1,14 @@ +# Instruction "halt" breaks the execution of the current script file +# (returning from the C call to tmpasm() or stop processing an "include", +# returning to executing the parent script). It is useful for cheap +# termination of the script file from deep inside nested loops/ifs. + +put tmp {true} +if tmp then + foreach item in {foo bar true baz} + print item {\n} + if item then + halt + end + end +end diff --git a/scconfig/src/tmpasm/regression/Tutor12_halt.ref b/scconfig/src/tmpasm/regression/Tutor12_halt.ref new file mode 100644 index 0000000..8f02c44 --- /dev/null +++ b/scconfig/src/tmpasm/regression/Tutor12_halt.ref @@ -0,0 +1,17 @@ +put [at 6:1] + arg: tmp + arg: {true} +if tmp [at 7:8] +then: + foreach item in {foo bar true baz} [at 8:2] + print [at 9:3] + arg: item + arg: { +} + if item [at 10:11] + then: + halt [at 11:4] + else: + (NOP) +else: + (NOP) diff --git a/scconfig/src/tmpasm/regression/append.gasm b/scconfig/src/tmpasm/regression/append.gasm new file mode 100644 index 0000000..b38a10b --- /dev/null +++ b/scconfig/src/tmpasm/regression/append.gasm @@ -0,0 +1,6 @@ +append tmp {foo} +append tmp {bar} +append tmp {baz} +foreach n in tmp + print {-> } n {\n} +end diff --git a/scconfig/src/tmpasm/regression/append.ref b/scconfig/src/tmpasm/regression/append.ref new file mode 100644 index 0000000..b7fea30 --- /dev/null +++ b/scconfig/src/tmpasm/regression/append.ref @@ -0,0 +1,15 @@ +append [at 1:1] + arg: tmp + arg: {foo} +append [at 2:1] + arg: tmp + arg: {bar} +append [at 3:1] + arg: tmp + arg: {baz} +foreach n in tmp [at 4:1] + print [at 5:2] + arg: {-> } + arg: n + arg: { +} diff --git a/scconfig/src/tmpasm/regression/comment.gasm b/scconfig/src/tmpasm/regression/comment.gasm new file mode 100644 index 0000000..c907351 --- /dev/null +++ b/scconfig/src/tmpasm/regression/comment.gasm @@ -0,0 +1,3 @@ +print data1 # this is a comment; until the end of this line +print data2 # this shall be a second print + diff --git a/scconfig/src/tmpasm/regression/comment.ref b/scconfig/src/tmpasm/regression/comment.ref new file mode 100644 index 0000000..e1d68d1 --- /dev/null +++ b/scconfig/src/tmpasm/regression/comment.ref @@ -0,0 +1,4 @@ +print [at 1:1] + arg: data1 +print [at 2:1] + arg: data2 diff --git a/scconfig/src/tmpasm/regression/err_excess_end.gasm b/scconfig/src/tmpasm/regression/err_excess_end.gasm new file mode 100644 index 0000000..cace30d --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_excess_end.gasm @@ -0,0 +1,3 @@ +if {1} then +end +end diff --git a/scconfig/src/tmpasm/regression/err_excess_end.ref b/scconfig/src/tmpasm/regression/err_excess_end.ref new file mode 100644 index 0000000..73807fb --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_excess_end.ref @@ -0,0 +1,6 @@ +error: Excess "end" at 3:4 +if {1} [at 1:8] +then: + (NOP) +else: + (NOP) diff --git a/scconfig/src/tmpasm/regression/err_if_else.gasm b/scconfig/src/tmpasm/regression/err_if_else.gasm new file mode 100644 index 0000000..318e019 --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_if_else.gasm @@ -0,0 +1,4 @@ +if v +else + print {else} +end diff --git a/scconfig/src/tmpasm/regression/err_if_else.ref b/scconfig/src/tmpasm/regression/err_if_else.ref new file mode 100644 index 0000000..6514b4a --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_if_else.ref @@ -0,0 +1,6 @@ +error: unexpected 'else' - must be in a 'then' block before an else at 2:5 +if *NULL - broken AST* [at 1:1] +then: + (NOP) +else: + (NOP) diff --git a/scconfig/src/tmpasm/regression/err_if_end.gasm b/scconfig/src/tmpasm/regression/err_if_end.gasm new file mode 100644 index 0000000..3acbe6d --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_if_end.gasm @@ -0,0 +1,2 @@ +if v +end diff --git a/scconfig/src/tmpasm/regression/err_if_end.ref b/scconfig/src/tmpasm/regression/err_if_end.ref new file mode 100644 index 0000000..90b5430 --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_if_end.ref @@ -0,0 +1,6 @@ +error: unexpected "end" in "if" - expected "then" at 2:4 +if v [at 2:1] +then: + (NOP) +else: + (NOP) diff --git a/scconfig/src/tmpasm/regression/err_no_end.gasm b/scconfig/src/tmpasm/regression/err_no_end.gasm new file mode 100644 index 0000000..82a1bf0 --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_no_end.gasm @@ -0,0 +1,3 @@ +pritn {foo} +switch {cond} + case {1} print {foo}; end diff --git a/scconfig/src/tmpasm/regression/err_no_end.ref b/scconfig/src/tmpasm/regression/err_no_end.ref new file mode 100644 index 0000000..34646b1 --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_no_end.ref @@ -0,0 +1,6 @@ +pritn [at 1:1] + arg: {foo} +switch {cond} [at 2:1] + case {1} + print [at 3:11] + arg: {foo} diff --git a/scconfig/src/tmpasm/regression/err_switch_end.gasm b/scconfig/src/tmpasm/regression/err_switch_end.gasm new file mode 100644 index 0000000..2dffb08 --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_switch_end.gasm @@ -0,0 +1,2 @@ +switch +end diff --git a/scconfig/src/tmpasm/regression/err_switch_end.ref b/scconfig/src/tmpasm/regression/err_switch_end.ref new file mode 100644 index 0000000..7747386 --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_switch_end.ref @@ -0,0 +1,2 @@ +error: unexpected end of if switch statement; expected a data at 1:7 +(NOP) diff --git a/scconfig/src/tmpasm/regression/err_switch_nocond.gasm b/scconfig/src/tmpasm/regression/err_switch_nocond.gasm new file mode 100644 index 0000000..67520eb --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_switch_nocond.gasm @@ -0,0 +1,3 @@ +switch +case data print {foo}; end +end diff --git a/scconfig/src/tmpasm/regression/err_switch_nocond.ref b/scconfig/src/tmpasm/regression/err_switch_nocond.ref new file mode 100644 index 0000000..7747386 --- /dev/null +++ b/scconfig/src/tmpasm/regression/err_switch_nocond.ref @@ -0,0 +1,2 @@ +error: unexpected end of if switch statement; expected a data at 1:7 +(NOP) diff --git a/scconfig/src/tmpasm/regression/foreach.gasm b/scconfig/src/tmpasm/regression/foreach.gasm new file mode 100644 index 0000000..268aeed --- /dev/null +++ b/scconfig/src/tmpasm/regression/foreach.gasm @@ -0,0 +1,18 @@ +put a {1} +put foo {example-FOO} +put bar {example-BAR} +put baz {example-BAZ} +put hah {haha} + +# should set n to the string foo, the value of bar and the string baz +# per iteration +foreach n in [~foo ~bar~ baz~] + print {n=} n {\n} + print {a11} [@a12 @hah@ a14@] {\n} + print {a21} {a22 a23} {\n} + print {a31\n} +end +print {a41} {a42} {a43} {\n} +print + + diff --git a/scconfig/src/tmpasm/regression/foreach.ref b/scconfig/src/tmpasm/regression/foreach.ref new file mode 100644 index 0000000..6e6d67c --- /dev/null +++ b/scconfig/src/tmpasm/regression/foreach.ref @@ -0,0 +1,41 @@ +put [at 1:1] + arg: a + arg: {1} +put [at 2:1] + arg: foo + arg: {example-FOO} +put [at 3:1] + arg: bar + arg: {example-BAR} +put [at 4:1] + arg: baz + arg: {example-BAZ} +put [at 5:1] + arg: hah + arg: {haha} +foreach n in [~foo ~bar~ baz~] [at 9:1] + print [at 10:2] + arg: {n=} + arg: n + arg: { +} + print [at 11:2] + arg: {a11} + arg: [~a12 ~hah~ a14~] + arg: { +} + print [at 12:2] + arg: {a21} + arg: {a22 a23} + arg: { +} + print [at 13:2] + arg: {a31 +} +print [at 15:1] + arg: {a41} + arg: {a42} + arg: {a43} + arg: { +} +print [at 16:1] diff --git a/scconfig/src/tmpasm/regression/if.gasm b/scconfig/src/tmpasm/regression/if.gasm new file mode 100644 index 0000000..19f4f47 --- /dev/null +++ b/scconfig/src/tmpasm/regression/if.gasm @@ -0,0 +1,10 @@ +put a 1 +if a then + if b then + print {then-then} + else + print {then-else} + end +else + print {else} +end diff --git a/scconfig/src/tmpasm/regression/if.ref b/scconfig/src/tmpasm/regression/if.ref new file mode 100644 index 0000000..7290a2b --- /dev/null +++ b/scconfig/src/tmpasm/regression/if.ref @@ -0,0 +1,15 @@ +put [at 1:1] + arg: a + arg: 1 +if a [at 2:6] +then: + if b [at 3:7] + then: + print [at 4:3] + arg: {then-then} + else: + print [at 6:3] + arg: {then-else} +else: + print [at 9:2] + arg: {else} diff --git a/scconfig/src/tmpasm/regression/order.gasm b/scconfig/src/tmpasm/regression/order.gasm new file mode 100644 index 0000000..080e24c --- /dev/null +++ b/scconfig/src/tmpasm/regression/order.gasm @@ -0,0 +1,51 @@ +# test if without an else +put list {one two three four} + +print list {\n} + +print {\nnothing:\n} + +# do nothing: order matches +order out list {one} {before} {two} +print out {\n} + +order out list {one} {before} {one} +print out {\n} + +order out list {one} {after} {one} +print out {\n} + +order out list {two} {after} {one} +print out {\n} + +# do nothing: not on list matches +order out list {nine} {after} {one} +print out {\n} +order out list {one} {after} {nine} +print out {\n} + +print {\nbefore:\n} +order out list {two} {before} {one} +print out {\n} + +order out list {four} {before} {one} +print out {\n} + +order out list {four} {before} {three} +print out {\n} + +order out list {three} {before} {two} +print out {\n} + +print {\nafter:\n} +order out list {one} {after} {two} +print out {\n} + +order out list {one} {after} {four} +print out {\n} + +order out list {two} {after} {three} +print out {\n} + +order out list {two} {after} {four} +print out {\n} diff --git a/scconfig/src/tmpasm/regression/order.ref b/scconfig/src/tmpasm/regression/order.ref new file mode 100644 index 0000000..4728869 --- /dev/null +++ b/scconfig/src/tmpasm/regression/order.ref @@ -0,0 +1,159 @@ +put [at 2:1] + arg: list + arg: {one two three four} +print [at 4:1] + arg: list + arg: { +} +print [at 6:1] + arg: { +nothing: +} +order [at 9:1] + arg: out + arg: list + arg: {one} + arg: {before} + arg: {two} +print [at 10:1] + arg: out + arg: { +} +order [at 12:1] + arg: out + arg: list + arg: {one} + arg: {before} + arg: {one} +print [at 13:1] + arg: out + arg: { +} +order [at 15:1] + arg: out + arg: list + arg: {one} + arg: {after} + arg: {one} +print [at 16:1] + arg: out + arg: { +} +order [at 18:1] + arg: out + arg: list + arg: {two} + arg: {after} + arg: {one} +print [at 19:1] + arg: out + arg: { +} +order [at 22:1] + arg: out + arg: list + arg: {nine} + arg: {after} + arg: {one} +print [at 23:1] + arg: out + arg: { +} +order [at 24:1] + arg: out + arg: list + arg: {one} + arg: {after} + arg: {nine} +print [at 25:1] + arg: out + arg: { +} +print [at 27:1] + arg: { +before: +} +order [at 28:1] + arg: out + arg: list + arg: {two} + arg: {before} + arg: {one} +print [at 29:1] + arg: out + arg: { +} +order [at 31:1] + arg: out + arg: list + arg: {four} + arg: {before} + arg: {one} +print [at 32:1] + arg: out + arg: { +} +order [at 34:1] + arg: out + arg: list + arg: {four} + arg: {before} + arg: {three} +print [at 35:1] + arg: out + arg: { +} +order [at 37:1] + arg: out + arg: list + arg: {three} + arg: {before} + arg: {two} +print [at 38:1] + arg: out + arg: { +} +print [at 40:1] + arg: { +after: +} +order [at 41:1] + arg: out + arg: list + arg: {one} + arg: {after} + arg: {two} +print [at 42:1] + arg: out + arg: { +} +order [at 44:1] + arg: out + arg: list + arg: {one} + arg: {after} + arg: {four} +print [at 45:1] + arg: out + arg: { +} +order [at 47:1] + arg: out + arg: list + arg: {two} + arg: {after} + arg: {three} +print [at 48:1] + arg: out + arg: { +} +order [at 50:1] + arg: out + arg: list + arg: {two} + arg: {after} + arg: {four} +print [at 51:1] + arg: out + arg: { +} diff --git a/scconfig/src/tmpasm/regression/switch.gasm b/scconfig/src/tmpasm/regression/switch.gasm new file mode 100644 index 0000000..75c9d9e --- /dev/null +++ b/scconfig/src/tmpasm/regression/switch.gasm @@ -0,0 +1,8 @@ +put swdata {lol} +switch swdata + case data1 print {1a} {11}; print {1b} 12; print {1c} 13; print {1d} 14; end; + case [~data2 ~a~~] print {2a} 21; print {2b} 22; print {2c} 23; print {2d} 24; end; + default print {3a} 31; print {3b} 32; print {3c} 33; print {3d} 34; end; +end; +print {i1} +print {i2} diff --git a/scconfig/src/tmpasm/regression/switch.ref b/scconfig/src/tmpasm/regression/switch.ref new file mode 100644 index 0000000..db8c6e4 --- /dev/null +++ b/scconfig/src/tmpasm/regression/switch.ref @@ -0,0 +1,47 @@ +put [at 1:1] + arg: swdata + arg: {lol} +switch swdata [at 2:1] + case data1 + print [at 3:24] + arg: {1a} + arg: {11} + print [at 3:41] + arg: {1b} + arg: 12 + print [at 3:56] + arg: {1c} + arg: 13 + print [at 3:71] + arg: {1d} + arg: 14 + case [~data2 ~a~~] + print [at 4:24] + arg: {2a} + arg: 21 + print [at 4:41] + arg: {2b} + arg: 22 + print [at 4:56] + arg: {2c} + arg: 23 + print [at 4:71] + arg: {2d} + arg: 24 + default + print [at 5:24] + arg: {3a} + arg: 31 + print [at 5:41] + arg: {3b} + arg: 32 + print [at 5:56] + arg: {3c} + arg: 33 + print [at 5:71] + arg: {3d} + arg: 34 +print [at 7:1] + arg: {i1} +print [at 8:1] + arg: {i2} diff --git a/scconfig/src/tmpasm/regression/test.gasm b/scconfig/src/tmpasm/regression/test.gasm new file mode 100644 index 0000000..72b2738 --- /dev/null +++ b/scconfig/src/tmpasm/regression/test.gasm @@ -0,0 +1,47 @@ +### set up internal variables ### +put /local/cflags {-std=c99 -Wall} +put /local/ldflags {-lm} +put /local/objs {main.o foo.o bar.o} + +# turn off optimization and add -g in debug mode +if /local/debug then + append /local/cflags {-g} +else + append /local/cflags {-O2} +end + +# if somelib is selected, add -I and -l +isempty /local/r /local/somelib +invert /local/r +if /local/r then + append /local/cflags { -I/usr/include/somelib} + append /local/ldflags { -lsomelib} +end + +### Generate the Makefile ### +print [@ +# Makefile generated by scconfig - DO NOT EDIT - please edit Makefile.in +CFLAGS=@/local/cflags@ +LDFLAGS=@/local/ldflags@ +OBJS=@/local/objs@ + +all: main + +main: $(OBJS) + $(CC) $(LDFLAGS) + +@] + +# loop over each object and generate an explicit rule +# (we are generating a dumb Makefile that would work with any +# old version of make) +foreach /local/o in /local/objs + put /local/c /local/o + sub /local/c {.o$} {.c} + print [@ +@/local/o@: @/local/c@ + $(CC) -c $(CFLAGS) @/local/c@ -o @/local/o@ + @] +end + +print {#end\n} diff --git a/scconfig/src/tmpasm/regression/test.ref b/scconfig/src/tmpasm/regression/test.ref new file mode 100644 index 0000000..8cc2e5e --- /dev/null +++ b/scconfig/src/tmpasm/regression/test.ref @@ -0,0 +1,62 @@ +put [at 2:1] + arg: /local/cflags + arg: {-std=c99 -Wall} +put [at 3:1] + arg: /local/ldflags + arg: {-lm} +put [at 4:1] + arg: /local/objs + arg: {main.o foo.o bar.o} +if /local/debug [at 7:17] +then: + append [at 8:2] + arg: /local/cflags + arg: {-g} +else: + append [at 10:2] + arg: /local/cflags + arg: {-O2} +isempty [at 14:1] + arg: /local/r + arg: /local/somelib +invert [at 15:1] + arg: /local/r +if /local/r [at 16:13] +then: + append [at 17:2] + arg: /local/cflags + arg: { -I/usr/include/somelib} + append [at 18:2] + arg: /local/ldflags + arg: { -lsomelib} +else: + (NOP) +print [at 22:1] + arg: [~ +# Makefile generated by scconfig - DO NOT EDIT - please edit Makefile.in +CFLAGS=~/local/cflags~ +LDFLAGS=~/local/ldflags~ +OBJS=~/local/objs~ + +all: main + +main: $(OBJS) + $(CC) $(LDFLAGS) + +~] +foreach /local/o in /local/objs [at 38:1] + put [at 39:2] + arg: /local/c + arg: /local/o + sub [at 40:2] + arg: /local/c + arg: {.o$} + arg: {.c} + print [at 41:2] + arg: [~ +~/local/o~: ~/local/c~ + $(CC) -c $(CFLAGS) ~/local/c~ -o ~/local/o~ + ~] +print [at 47:1] + arg: {#end +} diff --git a/scconfig/src/tmpasm/regression/then.gasm b/scconfig/src/tmpasm/regression/then.gasm new file mode 100644 index 0000000..a2e3828 --- /dev/null +++ b/scconfig/src/tmpasm/regression/then.gasm @@ -0,0 +1,6 @@ +# test if without an else +if cnd then + print a1 a2 a3 +end + +print a1 a2 a3 diff --git a/scconfig/src/tmpasm/regression/then.ref b/scconfig/src/tmpasm/regression/then.ref new file mode 100644 index 0000000..680e6c1 --- /dev/null +++ b/scconfig/src/tmpasm/regression/then.ref @@ -0,0 +1,12 @@ +if cnd [at 2:8] +then: + print [at 3:2] + arg: a1 + arg: a2 + arg: a3 +else: + (NOP) +print [at 6:1] + arg: a1 + arg: a2 + arg: a3 diff --git a/scconfig/src/tmpasm/tester.c b/scconfig/src/tmpasm/tester.c new file mode 100644 index 0000000..a7007f3 --- /dev/null +++ b/scconfig/src/tmpasm/tester.c @@ -0,0 +1,64 @@ +#include +#include +#include "tmpasm.h" +#include "tmpasm_scconfig.h" +#include "debug.h" +#include "db.h" + +tmpasm_t *ctx; + +void re_fail(char *s, char c) +{ + fprintf(stderr, "Regex error: %s [opcode %o]\n", s, c); + abort(); +} + +static void do_dump() +{ + tmpasm_dump(ctx, stdout); +} + +static void do_exec() +{ + if (ctx->dead) + fprintf(stderr, "Can not execute the script due to the above compilation error.\n"); + else + tmpasm_execute(ctx); +} + +static void scc_init(void) +{ + db_init(); + db_mkdir("/local"); + db_cd("/local"); +} + +int main(int argc, char *argv[]) +{ + scc_init(); + ctx = tmpasm_init(&scc_cb); + scc_tmpasm_parse(ctx, NULL, stdin, stdout); + + if (argc > 1) { + char *cmd; + cmd = argv[1]; + while(*cmd == '-') cmd++; + switch(*cmd) { + case 'd': do_dump(); break; + case 'e': do_exec(); break; + } + } + else + do_dump(); + + if (ctx->runtime_error != 0) { + const char *fmt = tmpasm_runtime_error_fmt(ctx); + fprintf(stderr, "Runtime error at %d:%d: ", ctx->runtime_error_line, ctx->runtime_error_col); + fprintf(stderr, fmt, (ctx->runtime_error_data == NULL ? "" : ctx->runtime_error_data)); + fprintf(stderr, "\n"); + } + + tmpasm_uninit(ctx); + db_uninit(); + return 0; +} diff --git a/scconfig/src/tmpasm/tmpasm.c b/scconfig/src/tmpasm/tmpasm.c new file mode 100644 index 0000000..9c868bd --- /dev/null +++ b/scconfig/src/tmpasm/tmpasm.c @@ -0,0 +1,810 @@ +#include +#include +#include +#include +#include "tmpasm.h" +#include "debug.h" + +#define is_space(c) (((c) == ' ') || ((c) == '\t')) +#define is_sep(c) (((c) == '\n') || ((c) == '\r') || ((c) == ';')) +#define is_addr(c) ( (((c) >= '0') && ((c) <= '9')) || (((c) >= 'a') && ((c) <= 'z')) || (((c) >= 'A') && ((c) <= 'Z')) || ((c) == '_') || ((c) == '?') || ((c) == '.') || ((c) == ',') || ((c) == ',') || ((c) == '-') || ((c) == '/') || ((c) == '&') ) + +/* this local copy is to make tmpasm compile independently */ +static char *strclone(const char *str) +{ + int l; + char *ret; + + if (str == NULL) + return NULL; + + l = strlen(str)+1; + ret = malloc(l); + memcpy(ret, str, l); + return ret; +} + + +#define TOP ctx->st + +static const char *kw_names[] = {"-", "if", "then", "else", "end", "foreach", "in", "switch", "case", "default", "nop", NULL }; + +static tmpasm_kw_t kw_lookup(const char *str) +{ + const char **k; + tmpasm_kw_t i; + +/* slow linear search is enough: we have only a few keywords */ + for(k = kw_names, i = KW_none; *k != NULL; k++,i++) + if (strcmp(*k, str) == 0) + return i; + return KW_none; +} + + +tmpasm_exec_t *code_new(tmpasm_kw_t kw) +{ + tmpasm_exec_t *c; + c = calloc(sizeof(tmpasm_exec_t), 1); + c->kw = kw; + return c; +} + +/*tmpasm_exec_t *code_end(tmpasm_exec_t *start) +{ + while(start->next != NULL) + start = start->next; + return start; +}*/ + +tmpasm_exec_t *code_append(tmpasm_t *ctx, tmpasm_kw_t kw) +{ + tmpasm_exec_t *c; +/* c = code_end(TOP->code);*/ + c = TOP->last_code; + if (TOP->last_code->kw != KW_NOP) { + c->next = code_new(kw); + return c->next; + } + c->kw = kw; + return c; +} + +static void error(tmpasm_t *ctx, char c, char *msg) +{ + fprintf(stderr, "error: %s at %d:%d\n", msg, ctx->line, ctx->col); + if (c != 0) + fprintf(stderr, " character last seen: %c\n", c); + ctx->dead = 1; +} + +static void push(tmpasm_t *ctx, tmpasm_kw_t kw, tmpasm_state_t st, tmpasm_exec_t *code) +{ + tmpasm_stack_t *new; + new = calloc(sizeof(tmpasm_stack_t), 1); + new->kw = kw; + new->state = st; + new->next = ctx->st; + new->last_code = code; + ctx->st = new; +} + +static void pop_(tmpasm_t *ctx, int chk_underfl) +{ + tmpasm_stack_t *old; + old = ctx->st; + ctx->st = old->next; + + /* stack underflow? */ + if (chk_underfl) { + if (TOP == NULL) { + error(ctx, 0, "Excess \"end\""); + TOP = old; + return; + } + } + if (old->argv != NULL) + free(old->argv); + if (old->argend != NULL) + free(old->argend); + if (old->arg_used != NULL) + free(old->arg_used); + if (old->arg_alloced != NULL) + free(old->arg_alloced); + free(old); +} + +static void pop(tmpasm_t *ctx) +{ + pop_(ctx, 1); +} + + +#define grow(arr, size) arr = realloc(arr, sizeof((arr)[0]) * size) + +static void arg_new(tmpasm_t *ctx, int is_addr) +{ + if (TOP->args_used >= TOP->args_alloced) { + TOP->args_alloced = TOP->args_alloced + 16; + grow(TOP->argv, TOP->args_alloced); + grow(TOP->argend, TOP->args_alloced); + grow(TOP->arg_alloced, TOP->args_alloced); + grow(TOP->arg_used, TOP->args_alloced); + } + + TOP->arg_alloced[TOP->args_used] = 64; + TOP->arg_used[TOP->args_used] = 0; + + TOP->argv[TOP->args_used] = malloc(TOP->arg_alloced[TOP->args_used]+sizeof(tmpasm_arg_t)); + TOP->argv[TOP->args_used]->is_addr = is_addr; + TOP->argv[TOP->args_used]->next = NULL; + TOP->argend[TOP->args_used] = TOP->argv[TOP->args_used]; + + TOP->args_used++; +} + + +static void arg_append(tmpasm_t *ctx, char c) +{ + int i = TOP->args_used - 1; + + if (TOP->arg_used[i] >= TOP->arg_alloced[i]) { + tmpasm_arg_t *prev, *last; + + /* since argend[i] is also in the ->next pointer of the previous item in a block chain, we need to look it up */ + for(prev = NULL, last = TOP->argv[i]; last->next != NULL; last = last->next) + prev = last; + + TOP->arg_alloced[i] += 64; + last = realloc(last, TOP->arg_alloced[i]+sizeof(tmpasm_arg_t)); + + if (prev == NULL) + TOP->argv[i] = last; + else + prev->next = last; + + TOP->argend[i] = last; + } + TOP->argend[i]->data[TOP->arg_used[i]] = c; + TOP->arg_used[i]++; +} + +static void arg_free(tmpasm_arg_t *a) +{ + tmpasm_arg_t *next; + if (a == NULL) + return; + next = a->next; + free(a); + if (next != NULL) + arg_free(next); +} + +static void arg_new_next(tmpasm_t *ctx, int is_addr) +{ + tmpasm_arg_t *a; + int id; + + arg_append(ctx, '\0'); + + id = TOP->args_used - 1; + assert(id>=0); + TOP->arg_alloced[id] = 64; + TOP->arg_used[id] = 0; + + a = malloc(TOP->arg_alloced[id]+sizeof(tmpasm_arg_t)); + strcpy(a->data, "QWERT"); + a->is_addr = is_addr; + a->next = NULL; + TOP->argend[id]->next = a; + TOP->argend[id] = a; +} + +static void arg_remove(tmpasm_t *ctx) +{ + assert(TOP->args_used == 1); + TOP->args_used = 0; + TOP->argv[0] = NULL; + TOP->argend[0] = NULL; + TOP->arg_alloced[0] = 0; + TOP->arg_used[0] = 0; +} + +static int arg_is_addr(tmpasm_arg_t *a) +{ + return (a->next == NULL) && (a->is_addr); +} + +static void arg_end(tmpasm_t *ctx, int cmd_ctx) +{ + tmpasm_arg_t *a; + arg_append(ctx, '\0'); + + a = TOP->argv[TOP->args_used-1]; + if (cmd_ctx) { + /* when argument ends in a command context (not in a block inline), we + may may need to switch back to command mode; example: after + the cond of an "if cond then"*/ + switch(TOP->kw) { + case KW_IF: + TOP->state = ST_PRECMD; + break; + case KW_FOREACH: + if (!arg_is_addr(a)) { + error(ctx, 0, "variable of a foreach must be an address"); + return; + } + TOP->last_code->payload.fc_foreach.loop_var = strclone(a->data); + arg_free(a); + arg_remove(ctx); + TOP->state = ST_PRECMD; + break; + case KW_IN: + /* pop will free the argv[] array, but not the elements so "a" is safe to use after this line */ + pop(ctx); + /* in foreach context, after the IN-data */ + TOP->last_code->payload.fc_foreach.data = a; + + /* we are in the body now, TOP is the foreach context, last_code is body */ + TOP->last_code->payload.fc_foreach.code_body = code_new(KW_NOP); + push(ctx, KW_none, ST_PRECMD, TOP->last_code->payload.fc_foreach.code_body); + break; + case KW_CASE: + ctx->st->next->last_code->payload.fc_switch.last->data = a; + arg_remove(ctx); + push(ctx, KW_none, ST_PRECMD, TOP->last_code); + break; + case KW_SWITCH: + TOP->last_code->payload.fc_switch.cond = a; + arg_remove(ctx); + TOP->state = ST_PRECMD; + break; + default: + TOP->state = ST_PREDATA; + } + } +} + + + +/* end of statement; update kw state for a composite control kw; for the rest + just call the lib */ +static void end_of_statement(tmpasm_t *ctx) +{ + switch(TOP->kw) { + case KW_none: + case KW_THEN: + case KW_ELSE: + case KW_CASE: + case KW_DEFAULT: + TOP->last_code->payload.instr.argc = TOP->args_used; + TOP->last_code->payload.instr.argv = TOP->argv; + TOP->argv = NULL; + free(TOP->argend); + TOP->argend = NULL; + TOP->args_used = 0; + TOP->args_alloced = 0; + break; + default: + /* don't mess with the payload */ + ; + } + TOP->state = ST_PRECMD; +} + +#define loc_update() \ + do { \ + TOP->last_code->line = TOP->kwline; \ + TOP->last_code->col = TOP->kwcol; \ + } while(0) + +static void got_kw(tmpasm_t *ctx, tmpasm_kw_t kw, int terminated) +{ + switch(kw) { + case KW_END: + /* then-else threads have their own subcontext within the if subcontext; end needs to pop the innermost subcontext before terminating the if context */ + if (TOP->kw == KW_IF) { + error(ctx, 0, "unexpected \"end\" in \"if\" - expected \"then\""); + goto bind_if_cond; + } + if ((TOP->kw == KW_ELSE) || (TOP->kw == KW_THEN)) + pop(ctx); + + + if (TOP->kw == KW_SWITCH) + TOP->kw = TOP->old_kw; + else { + pop(ctx); + + if ((TOP->kw == KW_CASE) || (TOP->kw == KW_DEFAULT)) + pop(ctx); + + } + TOP->state = ST_PRECMD; + + /* have to restore context keyword after these */ + if (TOP->kw == KW_FOREACH) + TOP->kw = TOP->old_kw; + + break; + case KW_IF: + if (terminated) { + error(ctx, 0, "unexpected end of if statement; expected a condition"); + return; + } + TOP->last_code = code_append(ctx, KW_IF); + TOP->last_code->payload.fc_if.code_then = code_new(KW_NOP); + TOP->last_code->payload.fc_if.code_else = code_new(KW_NOP); + loc_update(); + TOP->state = ST_PRECMD; + /* prepare for reading a condition */ + push(ctx, KW_IF, ST_PREDATA, TOP->last_code); + break; + case KW_THEN: + /* we are in an if context, right after reading a condition */ + if (TOP->kw != KW_IF) { + error(ctx, 0, "unexpected 'then' - must be in an 'if' after the condition"); + return; + } + bind_if_cond:; + TOP->last_code->payload.fc_if.cond = TOP->argv[0]; + loc_update(); + arg_remove(ctx); + push(ctx, KW_THEN, ST_PRECMD, TOP->last_code->payload.fc_if.code_then); + break; + case KW_ELSE: + /* we are in an if context, after and end */ + if (TOP->kw != KW_THEN) { + error(ctx, 0, "unexpected 'else' - must be in a 'then' block before an else"); + return; + } + pop(ctx); /* that was the then branch */ + push(ctx, KW_ELSE, ST_PRECMD, TOP->last_code->payload.fc_if.code_else); + break; + case KW_FOREACH: + if (terminated) { + error(ctx, 0, "unexpected end of if foreach statement; expected an address"); + return; + } + TOP->last_code = code_append(ctx, KW_FOREACH); + loc_update(); + TOP->state = ST_PREDATA; + TOP->old_kw = TOP->kw; + TOP->kw = KW_FOREACH; + break; + case KW_IN: + if (TOP->kw != KW_FOREACH) + error(ctx, 0, "unexpected \"in\"; should be after the address in foreach"); + else + push(ctx, KW_IN, ST_PREDATA, NULL); + break; + case KW_SWITCH: + if (terminated) { + error(ctx, 0, "unexpected end of if switch statement; expected a data"); + return; + } + TOP->last_code = code_append(ctx, KW_SWITCH); + TOP->state = ST_PREDATA; + TOP->old_kw = TOP->kw; + TOP->kw = KW_SWITCH; + loc_update(); + break; + case KW_CASE: + case KW_DEFAULT: + if (TOP->kw == KW_SWITCH) { + tmpasm_case_t *c; + c = malloc(sizeof(tmpasm_case_t)); + c->body = code_new(KW_NOP); + c->data = NULL; + c->next = NULL; + if (TOP->last_code->payload.fc_switch.last == NULL) { + TOP->last_code->payload.fc_switch.first = c; + TOP->last_code->payload.fc_switch.last = c; + } + else { + TOP->last_code->payload.fc_switch.last->next = c; + TOP->last_code->payload.fc_switch.last = c; + } + if (kw == KW_DEFAULT) { + push(ctx, KW_DEFAULT, ST_PRECMD, c->body); + push(ctx, KW_none, ST_PRECMD, c->body); + c->data = NULL; + } + else + push(ctx, KW_CASE, ST_PREDATA, c->body); + } + else + error(ctx, 0, "unexpected \"case\" or \"default\"; should be in a switch (is the last case terminated by an \"end\"?)"); + break; + default: + TOP->last_code = code_append(ctx, KW_none); + TOP->last_code->payload.instr.call_name = strclone(TOP->cmd_buff); + if (TOP->last_code->payload.instr.call_name != NULL) { + TOP->last_code->payload.instr.call = ctx->cb->resolve(ctx, TOP->last_code->payload.instr.call_name); + loc_update(); + } + if (terminated) + TOP->state = ST_PRECMD; + else + TOP->state = ST_PREDATA; + } +} + +static void comment_start(tmpasm_t *ctx) +{ + push(ctx, KW_none, ST_COMMENT, NULL); +} + +int tmpasm_gotchar(tmpasm_t *ctx, char c) +{ + if (ctx->dead) + return -1; + switch(TOP->state) { + case ST_COMMENT: + if ((c == '\n') || (c == '\r')) { + pop(ctx); + if (TOP->state == ST_PREDATA) + end_of_statement(ctx); + } + break; + case ST_PRECMD: + if (c == '#') { + comment_start(ctx); + break; + } + if (is_space(c) || is_sep(c)) + break; + TOP->cmdi = 0; + TOP->state = ST_CMD; + TOP->kwline = ctx->line; + TOP->kwcol = ctx->col; + /* fall thru */ + case ST_CMD: + /* end of command or keyword */ + if (is_space(c) || is_sep(c)) { + TOP->cmd_buff[TOP->cmdi] = '\0'; + got_kw(ctx, kw_lookup(TOP->cmd_buff), is_sep(c)); + } + else { + TOP->cmd_buff[TOP->cmdi] = c; + TOP->cmdi++; + if (TOP->cmdi >= sizeof(TOP->cmd_buff)) + error(ctx, 0, "keyword or instruction name is too long"); + } + break; + case ST_PREDATA: + if (c == '#') { + comment_start(ctx); + break; + } + if (is_space(c)) + break; + if (is_sep(c)) + end_of_statement(ctx); + else if (c == '{') { + TOP->state = ST_STRING; + arg_new(ctx, 0); + } + else if (c == '[') { + TOP->state = ST_PREBLOCKSEP; + arg_new(ctx, 0); + } + else if (is_addr(c)) { + TOP->state = ST_ADDRESS; + arg_new(ctx, 1); + arg_append(ctx, c); + } + else + error(ctx, c, "unexpected character; expected '{' for starting a string or an address"); + break; + case ST_PREBLOCKSEP: + TOP->block_sep = c; + TOP->state = ST_BLOCK; + break; + case ST_BLOCK: + if (c == TOP->block_sep) + TOP->state = ST_BLOCKSEP; + else + arg_append(ctx, c); + break; + case ST_BLOCKSEP: + if (c != ']') { + arg_new_next(ctx, 1); + arg_append(ctx, c); + TOP->state = ST_BLOCK_INLINE; + } + else + arg_end(ctx, 1); + break; + case ST_BLOCK_INLINE: + if (c == TOP->block_sep) { + arg_new_next(ctx, 0); + TOP->state = ST_BLOCK; + } + else + arg_append(ctx, c); + break; + case ST_STRING: + if (c == '}') + arg_end(ctx, 1); + else if (c == '\\') + TOP->state = ST_STRING_ESCAPE; + else + arg_append(ctx, c); + break; + case ST_STRING_ESCAPE: + { + char co; + switch(c) { + case 'n': co = '\n'; break; + case 'r': co = '\r'; break; + case 't': co = '\t'; break; + case '\\': co = '\\'; break; + case 'o': co = '{'; break; + case 'c': co = '}'; break; + default: co = c; + } + arg_append(ctx, co); + TOP->state = ST_STRING; + } + break; + case ST_ADDRESS: + if (is_space(c)) + arg_end(ctx, 1); + else if (is_sep(c)) { + arg_end(ctx, 1); + end_of_statement(ctx); + } + else if (is_addr(c)) + arg_append(ctx, c); + else + error(ctx, c, "unexpected character; expected next character of the address"); + break; + } + if (c == '\n') { + ctx->line++; + ctx->col = 1; + } + else + ctx->col++; + return 0; +} + +tmpasm_t *tmpasm_init(const tmpasm_cb_t *cb) +{ + tmpasm_t *ctx; + ctx = calloc(sizeof(tmpasm_t), 1); + ctx->line = 1; + ctx->col = 1; + ctx->code = code_new(KW_NOP); + ctx->cb = cb; + push(ctx, KW_none, ST_PRECMD, ctx->code); + return ctx; +} + +static void free_exec(tmpasm_exec_t *e) +{ + int n; + tmpasm_case_t *c, *c_next; + tmpasm_exec_t *e_next; + + for(; e != NULL; e = e_next) { + e_next = e->next; + switch(e->kw) { + case KW_none: + if (e->payload.instr.call_name != NULL) + free(e->payload.instr.call_name); + for(n = 0; n < e->payload.instr.argc; n++) + arg_free(e->payload.instr.argv[n]); + free(e->payload.instr.argv); + break; + case KW_IF: + arg_free(e->payload.fc_if.cond); + free_exec(e->payload.fc_if.code_then); + free_exec(e->payload.fc_if.code_else); + break; + case KW_FOREACH: + free(e->payload.fc_foreach.loop_var); + arg_free(e->payload.fc_foreach.data); + free_exec(e->payload.fc_foreach.code_body); + break; + case KW_SWITCH: + arg_free(e->payload.fc_switch.cond); + for(c = e->payload.fc_switch.first; c != NULL; c = c_next) { + c_next = c->next; + if (c->data != NULL) + arg_free(c->data); + free_exec(c->body); + free(c); + } + break; + default:; + } + free(e); + } +} + +void tmpasm_uninit(tmpasm_t *ctx) +{ + free_exec(ctx->code); + while (ctx->st != NULL) + pop_(ctx, 0); + if (ctx->runtime_error_data != NULL) + free(ctx->runtime_error_data); + free(ctx); +} + +/****************** runtime ********************/ + +static const char *tmpasm_runtime_error_fmts[] = { + "success %s", + "variable '%s' does not exist", + "empty argument (broken AST)%s", + "compilation error: control block without an \"end\"; premature end of script%s", + "attempt to call unresolved instruction '%s'", + NULL +}; + +void tmpasm_runtime_error(tmpasm_t *ctx, int code, const char *data) +{ + ctx->runtime_error = code; + if (ctx->runtime_error_data != NULL) + free(ctx->runtime_error_data); + ctx->runtime_error_data = strclone(data); + if (ctx->executing != NULL) { + ctx->runtime_error_line = ctx->executing->line; + ctx->runtime_error_col = ctx->executing->col; + } + else { + ctx->runtime_error_line = 0; + ctx->runtime_error_col = 0; + } +} + +const char *tmpasm_runtime_error_fmt(tmpasm_t *ctx) +{ + if (ctx->runtime_error == 0) + return NULL; + if ((ctx->runtime_error < 0) && (ctx->cb->runtime_error_fmt != NULL)) { + const char *fmt; + fmt = ctx->cb->runtime_error_fmt(ctx); + if (fmt != NULL) + return fmt; + } + if ((ctx->runtime_error < 0) || ((size_t)ctx->runtime_error > (sizeof(tmpasm_runtime_error_fmts)/sizeof(char *)))) + return "invalid error code %s"; + return tmpasm_runtime_error_fmts[ctx->runtime_error]; +} + +char *tmpasm_arg2str(tmpasm_t *ctx, tmpasm_arg_t *a, int keep_addr) +{ + if (a == NULL) { + tmpasm_runtime_error(ctx, 2, NULL); + return strclone(""); + } + if (a->next != NULL) { + /* block mode */ + int alloced = 0, used = 0; + char *s = NULL; + const char *i; + + for(;a != NULL; a = a->next) { + int l; + if (a->is_addr) { + i = ctx->cb->get(ctx, a->data); + if (i == NULL) { + i = ""; + tmpasm_runtime_error(ctx, 1, strclone(a->data)); + } + } + else + i = a->data; + l = strlen(i); + if (used + l >= alloced) { + alloced = used + l + 256; + s = realloc(s, alloced); + } + memcpy(s+used, i, l); + used += l; + } + s[used] = '\0'; + return s; + } + + /* non-block */ + if (a->is_addr) { + const char *i; + if (keep_addr) + i = a->data; + else + i = ctx->cb->get(ctx, a->data); + if (i == NULL) { + i = ""; + tmpasm_runtime_error(ctx, 1, strclone(a->data)); + } + return strclone(i); + } + + return strclone(a->data); +} + +static void execute(tmpasm_t *ctx, tmpasm_exec_t *e) +{ + tmpasm_case_t *c; + void *state; + char *cond, *list; + const char *i; + + while((e != NULL) && (ctx->runtime_error == 0) && (ctx->halt == 0)) { + ctx->executing = e; + switch(e->kw) { + case KW_none: + if (e->payload.instr.call != NULL) + e->payload.instr.call(ctx, e->payload.instr.call_name, e->payload.instr.argc, e->payload.instr.argv); + else + tmpasm_runtime_error(ctx, 4, e->payload.instr.call_name); + break; + case KW_IF: + cond = tmpasm_arg2str(ctx, e->payload.fc_if.cond, 0); + if (ctx->cb->is_true(ctx, cond)) + execute(ctx, e->payload.fc_if.code_then); + else + execute(ctx, e->payload.fc_if.code_else); + free(cond); + break; + case KW_FOREACH: + list = tmpasm_arg2str(ctx, e->payload.fc_foreach.data, 0); + for(i = ctx->cb->first(ctx, &state, list); i != NULL; i = ctx->cb->next(ctx, &state)) { + ctx->cb->set(ctx, e->payload.fc_foreach.loop_var, i); + execute(ctx, e->payload.fc_foreach.code_body); + } + free(list); + break; + case KW_SWITCH: + cond = tmpasm_arg2str(ctx, e->payload.fc_switch.cond, 0); + for(c = e->payload.fc_switch.first; c != NULL; c = c->next) { + char *cv = NULL; + if (c->data != NULL) + cv = tmpasm_arg2str(ctx, c->data, 0); + if ((c->data == NULL) || (ctx->cb->match(ctx, cond, cv))) { + execute(ctx, c->body); + if (cv != NULL) + free(cv); + break; + } + if (cv != NULL) + free(cv); + } + free(cond); + break; + default:; + } + e = e->next; + } +} + +void tmpasm_execute(tmpasm_t *ctx) +{ + if (TOP->next != NULL) { + ctx->executing = TOP->next->last_code; + tmpasm_runtime_error(ctx, 3, NULL); + return; + } + if ((TOP->state != ST_PRECMD) || (TOP->kw != KW_none)) { + ctx->executing = TOP->last_code; + tmpasm_runtime_error(ctx, 3, NULL); + return; + } + ctx->halt = 0; + ctx->runtime_error = 0; + if (ctx->runtime_error_data != NULL) { + free(ctx->runtime_error_data); + ctx->runtime_error_data = NULL; + } + if (ctx->cb->preexec != NULL) + ctx->cb->preexec(ctx); + execute(ctx, ctx->code); + if (ctx->cb->postexec != NULL) + ctx->cb->postexec(ctx); +} + diff --git a/scconfig/src/tmpasm/tmpasm.h b/scconfig/src/tmpasm/tmpasm.h new file mode 100644 index 0000000..5880050 --- /dev/null +++ b/scconfig/src/tmpasm/tmpasm.h @@ -0,0 +1,182 @@ +#ifndef TMPASM_H +#define TMPASM_H +#ifndef TMPASM_INSTR_MAXLEN +#define TMPASM_INSTR_MAXLEN 32 +#endif + +typedef struct tmpasm_s tmpasm_t; + +typedef struct tmpasm_arg_s tmpasm_arg_t; + +struct tmpasm_arg_s { + tmpasm_arg_t *next; /* block: the resulting string is a list of strings and addresses */ + char is_addr; /* 1: arg is a node address; 0: arg is a string immediate */ + char data[1]; /* arg string - obviously longer than 1 char (but there's not special hack for that in C89), \0 terminated */ +}; + +/* user specified instruction prototype */ +typedef void tmpasm_instr(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]); + + +typedef struct tmpasm_cb_s { + /* return the value of a node at addr - NULL is an error */ + const char *(*get)(tmpasm_t *ctx, const char *addr); + + /* set the value of a node at addr to data; data may be NULL */ + void (*set)(tmpasm_t *ctx, const char *addr, const char *data); + + /* return 1 if data is true, 0 otherwise; data may be NULL (if an unknown variable is referenced) */ + int (*is_true)(tmpasm_t *ctx, const char *data); + + /* return 1 if str matches pat, 0 otherwise; str and pat may be NULL */ + int (*match)(tmpasm_t *ctx, const char *str, const char *pat); + + /* first iteration over list; return the first element (or NULL to end); the string returned is not free'd by the caller */ + const char *(*first)(tmpasm_t *ctx, void **state, char *list); + + /* return next element of a list or NULL on end (in which case state shall be also free'd by the caller); the string returned is not free'd by the caller */ + const char *(*next)(tmpasm_t *ctx, void **state); + + /* resolve an instruction name to a function pointer */ + tmpasm_instr *(*resolve)(tmpasm_t *ctx, const char *name); + + /* optional: called once before execution of a context starts */ + void (*preexec)(tmpasm_t *ctx); + + /* optional: called once before execution of a context starts */ + void (*postexec)(tmpasm_t *ctx); + + /* optional: resolve the current runtime error, called only for negative + error codes; should return a format string with exactly one %s in it + or NULL. */ + const char *(*runtime_error_fmt)(tmpasm_t *ctx); +} tmpasm_cb_t; + +int tmpasm_gotchar(tmpasm_t *ctx, char c); + +tmpasm_t *tmpasm_init(const tmpasm_cb_t *cb); +void tmpasm_uninit(tmpasm_t *ctx); + +/* return the string version of an arg in a newly malloc()'d string + if keep_addr is non-zero and a is a single address, no get() is run + but the address is returned as a string */ +char *tmpasm_arg2str(tmpasm_t *ctx, tmpasm_arg_t *a, int keep_addr); + +/* execute the code recursively until it exits */ +void tmpasm_execute(tmpasm_t *ctx); + +/* Set or get the runtime error of a context. 0 means no error, negative + codes are user errors handled by the runtime_error_fmt() callback + and positive codes are internal error. */ +void tmpasm_runtime_error(tmpasm_t *ctx, int code, const char *data); +const char *tmpasm_runtime_error_fmt(tmpasm_t *ctx); + +/* --- internals: not required for normal use --- */ +typedef enum { + ST_PRECMD, /* waiting for a command to start - ignore whitespace */ + ST_CMD, + ST_PREDATA, /* waiting for data */ + ST_PREBLOCKSEP, /* waiting for a block sep when opening a block */ + ST_BLOCKSEP, /* found a block sep within the block - either an address or a termination follows */ + ST_BLOCK, /* in [@ @] block, text part */ + ST_BLOCK_INLINE, /* in [@ @] block, within inline @@ part */ + ST_STRING, /* in {} string */ + ST_STRING_ESCAPE, /* in {} string, right after a \ */ + ST_ADDRESS, /* shifting address bytes */ + ST_COMMENT /* after #, until the next newline */ +} tmpasm_state_t; + +typedef enum { + KW_none, + KW_IF, + KW_THEN, + KW_ELSE, + KW_END, + KW_FOREACH, + KW_IN, + KW_SWITCH, + KW_CASE, + KW_DEFAULT, + + KW_NOP /* virtual instruction */ +} tmpasm_kw_t; + +/* execution structs */ +typedef struct tmpasm_exec_s tmpasm_exec_t; +typedef struct tmpasm_case_s tmpasm_case_t; + +struct tmpasm_case_s { + tmpasm_arg_t *data; + tmpasm_exec_t *body; + tmpasm_case_t *next; +}; + + +struct tmpasm_exec_s { + tmpasm_kw_t kw; /* kw_none means a hook instruction */ + union { + struct { /* normal instruction */ + tmpasm_instr *call; + char *call_name; /* temporary */ + int argc; + tmpasm_arg_t **argv; + } instr; + struct { + tmpasm_arg_t *cond; + tmpasm_exec_t *code_then; + tmpasm_exec_t *code_else; + } fc_if; + struct { + char *loop_var; /* must be a single address */ + tmpasm_arg_t *data; /* what to loop in */ + tmpasm_exec_t *code_body; + } fc_foreach; + struct { + tmpasm_arg_t *cond; + tmpasm_case_t *first; + tmpasm_case_t *last; + } fc_switch; + } payload; + int line, col; + tmpasm_exec_t *next; +}; + + +/* parser structs */ +typedef struct stack_s tmpasm_stack_t; +struct stack_s { + tmpasm_state_t state; +/* tmpasm_state_t kwstate; internal states of composite keywords like switch */ + char cmd_buff[TMPASM_INSTR_MAXLEN+1]; + unsigned int cmdi; + tmpasm_kw_t kw, old_kw; + char block_sep; + int kwcol, kwline; + + int args_used, args_alloced; /* number of arguments in argv[] */ + + tmpasm_arg_t **argv; /* an array of linked lists */ + tmpasm_arg_t **argend; /* each argv[] is a linked list (for blocks); argend points to the tail */ + int *arg_alloced; /* of argend */ + int *arg_used; /* of argend */ + + tmpasm_exec_t *last_code; /* tail of the code list */ + + tmpasm_stack_t *next; +}; + +struct tmpasm_s { + tmpasm_stack_t *st; + int dead; + int col, line; + tmpasm_exec_t *code; + tmpasm_exec_t *executing; /* points to the code most recently executed (or being executed when in callbacks) */ + const tmpasm_cb_t *cb; + int halt; + int runtime_error; + char *runtime_error_data; + int runtime_error_line; + int runtime_error_col; + void *user_data; +}; +#endif diff --git a/scconfig/src/tmpasm/tmpasm_scconfig.c b/scconfig/src/tmpasm/tmpasm_scconfig.c new file mode 100644 index 0000000..3f8145c --- /dev/null +++ b/scconfig/src/tmpasm/tmpasm_scconfig.c @@ -0,0 +1,634 @@ +#include +#include +#include +#include "tmpasm.h" +#include "db.h" +#include "regex.h" +#include "openfiles.h" +#include "libs.h" +#include "tmpasm_scconfig.h" +#include "log.h" +#include "regex.h" + +#ifndef TMPASM_PATH +#define TMPASM_PATH "/tmpasm" +#endif + +#ifndef IFS_PATH +#define IFS_PATH TMPASM_PATH "/IFS" +#endif + +#ifndef IFS_DEFAULT +#define IFS_DEFAULT " \t\r\n" +#endif + +#ifndef OFS_PATH +#define OFS_PATH TMPASM_PATH "/OFS" +#endif + +#ifndef OFS_DEFAULT +#define OFS_DEFAULT "\n" +#endif + +typedef struct scc_s { + openfiles_t ofl; + FILE *fout, *default_fout; + const char *cwd; +} scc_t; + +static const char *scc_runtime_error_fmts[] = { + /* -0 */ "success scc %s", + /* -1 */ "\"put\" requires exactly two arguments (got %s)", + /* -2 */ "not enough arguments for sub; should be \"sub node pattern str\"%s", + /* -3 */ "regex syntax error: %s", + /* -4 */ "not enough arguments for uniq; should be \"uniq destnode\" or \"uniq destnode src\"%s", + /* -5 */ "redir: too many arguments%s", + /* -6 */ "redir: can't open %s", + /* -7 */ "exiting due to a previous runtime error occurred in included file %s", + /* -8 */ "can't include '%s': can't open file", + /* -9 */ "\"put\" requires two or three arguments (got %s)", + /* -10 */ "\"order\" requires 4 or 5 arguments: \"order destnode word before|after word\" or \"order destnode src word before|after word\"", + /* -11 */ "\"uniq\" got too many grouping regular expressions", + NULL +}; + +static int print_runtime_error(tmpasm_t *ctx, const char *ifn) +{ + if (ctx->runtime_error != 0) { + const char *fmt = tmpasm_runtime_error_fmt(ctx); + fprintf(stderr, "Runtime error at %s %d:%d: ", ifn, ctx->runtime_error_line, ctx->runtime_error_col); + fprintf(stderr, fmt, (ctx->runtime_error_data == NULL ? "" : ctx->runtime_error_data)); + fprintf(stderr, "\n"); + return -1; + } + return 0; +} + +/* allocate and build a full path using ud->cwd and fn */ +static char *scc_path(scc_t *ud, const char *fn) +{ + if (ud->cwd == NULL) + return strclone(fn); + return str_concat("", ud->cwd, "/", fn, NULL); +} + +/******** db binding ********/ + +static const char *scc_get(tmpasm_t *ctx, const char *addr) +{ + (void) ctx; /* not used */ + + if (*addr == '&') { /* return whether exists */ + if (get(addr+1) != NULL) + return strue; + return sfalse; + } + + if (*addr == '?') { /* safe get: return "" instead of NULL to avoid runtime error */ + const char *res = get(addr+1); + if (res == NULL) + return ""; + return res; + } + return get(addr); +} + +static void scc_set(tmpasm_t *ctx, const char *addr, const char *data) +{ + (void) ctx; /* not used */ + put(addr, data); +} + +static int scc_is_true(tmpasm_t *ctx, const char *data) +{ + (void) ctx; /* not used */ + return ((strcmp(data, "1") == 0) || istrue(data)); +} + +static int scc_match(tmpasm_t *ctx, const char *str, const char *pat) +{ + (void) ctx; /* not used */ + re_comp(pat); + return re_exec(str); +} + +static const char *scc_ifs(tmpasm_t *ctx) +{ + const char *ifs = get(IFS_PATH); + (void) ctx; /* not used */ + if (ifs == NULL) + return IFS_DEFAULT; + return ifs; +} + +static const char *scc_ofs(tmpasm_t *ctx) +{ + const char *ofs = get(OFS_PATH); + (void) ctx; /* not used */ + if (ofs == NULL) + return OFS_DEFAULT; + return ofs; +} + +static const char *scc_next(tmpasm_t *ctx, void **state) +{ + char **s = (char **)state; + char *start; + const char *IFS; + + IFS = scc_ifs(ctx); + + /* strip leading whitespace */ + while(chr_inset(**s, IFS)) (*s)++; + + /* at the end of the string, no more tokens */ + if (**s == '\0') + return NULL; + + start = *s; + + /* skip non-whitespace */ + while(!(chr_inset(**s, IFS)) && (**s != '\0')) (*s)++; + + if (**s != '\0') { + **s = '\0'; + (*s)++; + } + return start; +} + + +static const char *scc_first(tmpasm_t *ctx, void **state, char *list) +{ + *state = list; + + return scc_next(ctx, state); +} + + +/******** instructions ********/ +static void instr_put(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + char *addr, *val; + (void) iname; /* not used */ + if (argc != 2) { + char str[16]; + sprintf(str, "%d", argc); + tmpasm_runtime_error(ctx, -1, str); + return; + } + addr = tmpasm_arg2str(ctx, argv[0], 1); + val = tmpasm_arg2str(ctx, argv[1], 0); + if (*addr != '\0') + put(addr, val); + free(addr); + free(val); +} + + +static void instr_append(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + char *addr, *val; + char *sep; + (void) iname; /* not used */ + + if ((argc < 2) || (argc > 3)) { + char str[16]; + sprintf(str, "%d", argc); + tmpasm_runtime_error(ctx, -9, str); + return; + } + addr = tmpasm_arg2str(ctx, argv[0], 1); + val = tmpasm_arg2str(ctx, argv[1], 0); + if (argc >= 3) + sep = tmpasm_arg2str(ctx, argv[2], 0); + else + sep = strclone(scc_ofs(ctx)); + if (*addr != '\0') { + append(addr, sep); + append(addr, val); + } + free(addr); + free(val); + free(sep); +} + +static void instr_report(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + int n; + (void) iname; /* not used */ + for(n = 0; n < argc; n++) { + char *val; + val = tmpasm_arg2str(ctx, argv[n], 0); + report("%s", val); + free(val); + } +} + +static void instr_abort(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + scc_t *ud = (scc_t *)ctx->user_data; + (void) iname; /* not used */ + (void) argc; /* not used */ + (void) argv; /* not used */ + report("Abort requested by template.\n"); + if (ud->fout) fflush(ud->fout); + fflush(stdout); + fflush(stderr); + abort(); +} + +static void instr_halt(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + (void) iname; /* not used */ + (void) argc; /* not used */ + (void) argv; /* not used */ + ctx->halt = 1; +} + +static void instr_sub(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + char *node, *pat, *err, *csub, *buff, *end; + const char *start; + const char *val; + int score, slen, global; + + if (argc < 3) { + tmpasm_runtime_error(ctx, -2, NULL); + return; + } + + node = tmpasm_arg2str(ctx, argv[0], 1); + pat = tmpasm_arg2str(ctx, argv[1], 0); + csub = tmpasm_arg2str(ctx, argv[2], 0); + global = (*iname == 'g'); + + val = get(node); + if (val == NULL) + val=""; + err = re_comp(pat); + if (err != NULL) { + tmpasm_runtime_error(ctx, -3, err); + return; + } + + slen = strlen(csub); + if (global) + buff = malloc(strlen(val)*(slen+3)+32); /* big enough for worst case, when every letter and $ and ^ are replaced with sub */ + else + buff = malloc(strlen(val)+slen+32); /* only one replacement will be done */ + strcpy(buff, val); + + start = buff; + do { + score = re_exec(start); + if (score == 0) + break; + end = buff + strlen(buff); + if (eopat[0] - bopat[0] != slen) { + int mlen = end - eopat[0]+1; + if (mlen > 0) + memmove((char *)(bopat[0] + slen), eopat[0], mlen); + } + memcpy((char *)bopat[0], csub, slen); + start = bopat[0] + slen; + } while(global); + + buff = realloc(buff, strlen(buff)+1); + put(node, buff); + free(buff); + free(node); + free(pat); + free(csub); +} + +#define UNIQ_ERE_MAX 16 +static char *uniq_eres[UNIQ_ERE_MAX]; + +static void instr_uniq(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + char *node, *strlist, *buff; + int eres = 0; + + if (argc < 1) { + tmpasm_runtime_error(ctx, -4, NULL); + return; + } + node = tmpasm_arg2str(ctx, argv[0], 1); + if (argc > 1) { + int offs = 2; + + strlist = tmpasm_arg2str(ctx, argv[1], 0); + if ((argc-offs) >= UNIQ_ERE_MAX) { + tmpasm_runtime_error(ctx, -11, NULL); + return; + } + while(argc > offs) + uniq_eres[eres++] = tmpasm_arg2str(ctx, argv[offs++], 0); + if (eres > 0) + uniq_eres[eres++] = ".*"; + } + else + strlist = strclone(get(node)); + buff = uniq_inc_str(strlist, scc_ifs(ctx), scc_ofs(ctx), (*iname == 's'), eres, uniq_eres); + put(node, buff); + free(buff); + free(strlist); + free(node); +} + +static void instr_order(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + char *node, *strlist, *buff, *w1, *dirs, *w2; + int offs, dir; + + if ((argc != 4) && (argc != 5)) { + tmpasm_runtime_error(ctx, -10, NULL); + return; + } + node = tmpasm_arg2str(ctx, argv[0], 1); + if (argc > 4) { + strlist = tmpasm_arg2str(ctx, argv[1], 0); + offs = 2; + } + else { + strlist = strclone(get(node)); + offs = 1; + } + + w1 = tmpasm_arg2str(ctx, argv[offs], 0); + dirs = tmpasm_arg2str(ctx, argv[offs+1], 0); + w2 = tmpasm_arg2str(ctx, argv[offs+2], 0); + + if (strcmp(dirs, "before") == 0) + dir = -1; + else if (strcmp(dirs, "after") == 0) + dir = +1; + else { + tmpasm_runtime_error(ctx, -10, NULL); + return; + } + + buff = order_inc_str(strlist, scc_ifs(ctx), w1, dir, w2); + put(node, buff); + free(buff); + free(strlist); + free(node); +} + +static void instr_print(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + int n; + scc_t *ud = (scc_t *)ctx->user_data; + (void) iname; /* not used */ + + for(n = 0; n < argc; n++) { + char *val; + val = tmpasm_arg2str(ctx, argv[n], 0); + fprintf(ud->fout, "%s", val); + free(val); + } +} + +static void instr_print_ternary(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + char *s_cond, *s; + scc_t *ud = (scc_t *)ctx->user_data; + (void) iname; /* not used */ + + if ((argc < 2) || (argc > 3)) { + char str[16]; + sprintf(str, "%d", argc); + tmpasm_runtime_error(ctx, -1, str); + return; + } + + s_cond = tmpasm_arg2str(ctx, argv[0], 0); + + if (ctx->cb->is_true(ctx, s_cond)) + s = tmpasm_arg2str(ctx, argv[1], 0); + else + s = tmpasm_arg2str(ctx, argv[2], 0); + + fprintf(ud->fout, "%s", s); + + free(s_cond); + free(s); +} + +static void scc_tmpasm_parse_(tmpasm_t *ctx, const char *cwd, FILE *fin, FILE *default_fout, FILE *fout) +{ + scc_t *ud = malloc(sizeof(scc_t)); + memset(&ud->ofl, 0, sizeof(ud->ofl)); + ctx->user_data = ud; + ud->default_fout = default_fout; + ud->fout = fout; + ud->cwd = cwd; + + for(;;) { + int c; + c = fgetc(fin); + if (c == EOF) + break; + tmpasm_gotchar(ctx, c); + } + +} + +void scc_tmpasm_parse(tmpasm_t *ctx, const char *cwd, FILE *fin, FILE *fout) +{ + scc_tmpasm_parse_(ctx, cwd, fin, fout, fout); +} + +#ifndef NO_FILE_IO +static void instr_include(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + scc_t *ud = (scc_t *)ctx->user_data; + int n; + (void) iname; /* not used */ + + for(n = 0; n < argc; n++) { + char *fn, *path; + FILE *fin; + tmpasm_t *child; + + fn = tmpasm_arg2str(ctx, argv[n], 0); + path = scc_path(ud, fn); + fin = fopen(path, "r"); + if (fin == NULL) { + tmpasm_runtime_error(ctx, -8, path); + free(fn); + free(path); + return; + } + child = tmpasm_init(ctx->cb); + scc_tmpasm_parse_(child, ud->cwd, fin, ud->default_fout, ud->fout); + tmpasm_execute(child); + if (print_runtime_error(child, path) != 0) + tmpasm_runtime_error(ctx, -7, path); + tmpasm_uninit(child); + fclose(fin); + free(fn); + free(path); + } +} + + +static void instr_redir(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + char *path, *fn, *mode; + scc_t *ud = (scc_t *)ctx->user_data; + (void) iname; /* not used */ + fflush(ud->fout); + switch(argc) { + case 0: ud->fout = ud->default_fout; return; /* set redirection to default */ + case 1: mode = strclone("w"); break; + case 2: mode = tmpasm_arg2str(ctx, argv[1], 0); break; + default: + tmpasm_runtime_error(ctx, -5, NULL); + return; + } + + fn = tmpasm_arg2str(ctx, argv[0], 0); + path = scc_path(ud, fn); + ud->fout = openfile_open(&ud->ofl, path, mode); + if (ud->fout == NULL) { + char *err = malloc(strlen(fn) + strlen(path) + strlen(mode) + 16); + sprintf(err, "%s (%s) for %s", path, fn, mode); + tmpasm_runtime_error(ctx, -6, err); + free(err); + free(path); + return; + } + free(fn); + free(mode); + free(path); +} +#endif + +#ifdef TMPASM_TESTER +static void instr_unknown(tmpasm_t *ctx, char *iname, int argc, tmpasm_arg_t *argv[]) +{ + printf("ERROR: unknown instruction '%s'\n", iname); +} +#endif + + +/******** interface ********/ + +tmpasm_instr *scc_resolve(tmpasm_t *ctx, const char *name) +{ + (void) ctx; /* not used */ +/* TODO: make this a hash */ + if (strcmp(name, "put") == 0) + return instr_put; + if (strcmp(name, "append") == 0) + return instr_append; + if (strcmp(name, "print") == 0) + return instr_print; + if (strcmp(name, "print_ternary") == 0) + return instr_print_ternary; +#ifndef TMPASM_NO_FILE_IO + if (strcmp(name, "redir") == 0) + return instr_redir; + if (strcmp(name, "include") == 0) + return instr_include; +#endif + if (strcmp(name, "report") == 0) + return instr_report; + if (strcmp(name, "abort") == 0) + return instr_abort; + if (strcmp(name, "halt") == 0) + return instr_halt; + if (strcmp(name, "uniq") == 0) + return instr_uniq; + if (strcmp(name, "order") == 0) + return instr_order; + if (strcmp(name, "sortuniq") == 0) + return instr_uniq; + if ((strcmp(name, "sub") == 0) || (strcmp(name, "gsub") == 0)) + return instr_sub; + +#ifndef TMPASM_TESTER + return NULL; +#else + return instr_unknown; +#endif +} + + +static const char *scc_err_fmt(tmpasm_t *ctx) +{ + int code; + code = -ctx->runtime_error; + + if ((code < 0) || ((size_t)code > (sizeof(scc_runtime_error_fmts)/sizeof(char *)))) + return NULL; + return scc_runtime_error_fmts[code]; +} + + +static void scc_preexec(tmpasm_t *ctx) +{ + (void) ctx; /* not used */ + db_mkdir(TMPASM_PATH); +} + +static void scc_postexec(tmpasm_t *ctx) +{ + scc_t *ud = (scc_t *)ctx->user_data; + openfile_free(&ud->ofl); + free(ud); +} + +tmpasm_cb_t scc_cb = { + scc_get, scc_set, scc_is_true, scc_match, scc_first, scc_next, + scc_resolve, scc_preexec, scc_postexec, scc_err_fmt +}; + +int tmpasm(const char *wdir, const char *input, const char *output) +{ + tmpasm_t *ctx; + FILE *fin, *fout; + int ret; + scc_t ud_tmp; + char *path; + + ud_tmp.cwd = wdir; + + path = scc_path(&ud_tmp, input); + fin = fopen(path, "r"); + if (fin == NULL) { + fprintf(stderr, "ERROR: tmpasm: can not open script '%s' (%s in %s)\n", path, input, wdir); + free(path); + return -1; + } + free(path); + + path = scc_path(&ud_tmp, output); + fout = fopen(path, "w"); + if (fout == NULL) { + fprintf(stderr, "ERROR: tmpasm: can not open output '%s' (%s in %s)\n", path, output, wdir); + free(path); + return -1; + } + free(path); + + ctx = tmpasm_init(&scc_cb); + scc_tmpasm_parse_(ctx, wdir, fin, fout, fout); + if (!ctx->dead) + tmpasm_execute(ctx); + fclose(fin); + fclose(fout); + + ret = print_runtime_error(ctx, input); + + tmpasm_uninit(ctx); + return ret; +} + +FILE *tmpasm_fout(tmpasm_t *ctx) +{ + scc_t *ud = (scc_t *)ctx->user_data; + return ud->fout; +} diff --git a/scconfig/src/tmpasm/tmpasm_scconfig.h b/scconfig/src/tmpasm/tmpasm_scconfig.h new file mode 100644 index 0000000..1ed50f7 --- /dev/null +++ b/scconfig/src/tmpasm/tmpasm_scconfig.h @@ -0,0 +1,14 @@ +#include +#include "tmpasm.h" + +int tmpasm(const char *wdir, const char *input, const char *output); + +void scc_tmpasm_parse(tmpasm_t *ctx, const char *cwd, FILE *fin, FILE *fout); + +FILE *tmpasm_fout(tmpasm_t *ctx); + +tmpasm_instr *scc_resolve(tmpasm_t *ctx, const char *name); + +extern tmpasm_cb_t scc_cb; + + diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..4f0ceaa --- /dev/null +++ b/src/Makefile @@ -0,0 +1,28 @@ +all: + cd libmawk && $(MAKE) +# cd example_apps && $(MAKE) all + +test: + cd libmawk && $(MAKE) test + +install: + cd libmawk && $(MAKE) install +# cd example_apps && $(MAKE) install + +uninstall: + cd libmawk && $(MAKE) uninstall +# cd example_apps && $(MAKE) uninstall + +linstall: + cd libmawk && $(MAKE) linstall +# cd example_apps && $(MAKE) linstall + +clean: +# cd example_apps && $(MAKE) clean + cd awklib && $(MAKE) clean + cd libmawk && $(MAKE) clean + +distclean: +# cd example_apps && $(MAKE) distclean + cd awklib && $(MAKE) distclean + cd libmawk && $(MAKE) distclean diff --git a/src/awklib/Makefile.in b/src/awklib/Makefile.in new file mode 100644 index 0000000..7b36b23 --- /dev/null +++ b/src/awklib/Makefile.in @@ -0,0 +1,45 @@ +put /tmpasm/IFS { } +put /local/libs {lib_array lib_ascii lib_rand} + +print [@ +# Generated by scconfig from Makefile.in +LIBPATH=$(install_root)/usr/lib/libmawk + +all: + +install: + @fstools/mkdir@ $(LIBPATH) +@] +foreach /local/n in /local/libs + print [@ @fstools/cp@ @/local/n@.awk $(LIBPATH)/@/local/n@.awk +@] +end + +print [@ +linstall: + @fstools/mkdir@ $(LIBPATH) +@] +foreach /local/n in /local/libs + print [@ @fstools/ln@ @/local/n@.awk $(LIBPATH)/@/local/n@.awk +@] +end + +print [@ +uninstall: +@] +foreach /local/n in /local/libs + print [@ @fstools/rm@ $(LIBPATH)/@/local/n@.awk +@] +end + +print [@ +test: + cd regression && make + +clean: + +distclean: clean + -rm Makefile regression/Makefile +@] + +put /tmpasm/IFS {} diff --git a/src/awklib/lib_array.awk b/src/awklib/lib_array.awk new file mode 100644 index 0000000..61d5ce0 --- /dev/null +++ b/src/awklib/lib_array.awk @@ -0,0 +1,77 @@ +function lib_array_version() +{ + return 1 +} + +BEGIN { +# item open: ASCII "start of text" + LIB_ARRAY_SOPEN = "\002" +# item close: ASCII "end of text" + LIB_ARRAY_SCLOSE = "\003" +# index/value separator: ASCII "unit separator" + LIB_ARRAY_SSEP = "\036" +} + +# print an array; each element in a new line prefixed by prefix +function lib_array_print(prefix, A ,n) +{ + for(n in A) + print prefix "[" n "] = '" A[n] "'" +} + +# Pack an array to a single string. Optionally use Sopen/Sclose/Ssep for +# separator characters. Each element of the array is packed in the following +# sequence: Sopen Ssep Sclose; index can not contain Ssep, +# but value may contain a full packed array using the same separators. +# If Sopen/Sclose/Ssep are empty, default separators are used +# (LIB_ARRAY_SOPEN/LIB_ARRAY_SCLOSE/LIB_ARRAY_SSEP). +function lib_array_pack(A, Sopen, Sclose, Ssep, n,s) +{ + s="" + if (Sopen == "") Sopen = LIB_ARRAY_SOPEN + if (Sclose == "") Sclose = LIB_ARRAY_SCLOSE + if (Ssep == "") Ssep = LIB_ARRAY_SSEP + + for(n in A) + s = s Sopen n Ssep A[n] Sclose + + return s +} + +# Unpack string to an array. Separator rules are the same as +# for lib_array_pack(). Returns number of indices found (>=0) or +# -1 on syntax error +function lib_array_unpack(ARRAY, str, Sopen, Sclose, Ssep, n,len,lvl,c,idx,val,start,idxs) +{ + if (Sopen == "") Sopen = LIB_ARRAY_SOPEN + if (Sclose == "") Sclose = LIB_ARRAY_SCLOSE + if (Ssep == "") Ssep = LIB_ARRAY_SSEP + + len = length(str); + lvl = 0 + idxs = 0 + for(n = 1; n <= len; n++) { + c = substr(str, n, 1); + if (c == Sopen) { + lvl++; + if (lvl == 1) { + n++ + start = n; + while((substr(str, n, 1) != Ssep) && (n <= len)) n++; + idx = substr(str, start, n-start) + start = n+1; + } + } + else if (c == Sclose) { + lvl-- + if (lvl == 0) { + ARRAY[idx] = substr(str, start, n-start) + idxs++ + } + } + } + if (lvl != 0) + return -1; + return idxs +} + diff --git a/src/awklib/lib_ascii.awk b/src/awklib/lib_ascii.awk new file mode 100644 index 0000000..0ee4bf6 --- /dev/null +++ b/src/awklib/lib_ascii.awk @@ -0,0 +1,288 @@ +function lib_ascii_version() +{ + return 1 +} + +# convert an binary character to ascii code (integer) +function lib_ascii_ord(chr) +{ + return lib_ascii_ASCII[chr] +} + +# ---- initializing the ASCII table ---- +# Slowest variant: 10k runs at 2.241 seconds on a machine +# for(n = 0; n < 256; n = n+1) +# lib_ascii_ASCII[sprintf("%c", n)] = n +# +# a bit faster: 10k runs at 1.604 on the same machine +# split("\001 \002 \003 \004 \005 \006 \007 \010 \011 \012 \013 \014 \015 \016 \017 \020 \021 \022 \023 \024 \025 \026 \027 \030 \031 \032 \033 \034 \035 \036 \037 ! \041 \042 \043 \044 \045 \046 \047 \050 \051 \052 \053 \054 \055 \056 \057 \060 \061 \062 \063 \064 \065 \066 \067 \070 \071 \072 \073 \074 \075 \076 \077 \100 \101 \102 \103 \104 \105 \106 \107 \110 \111 \112 \113 \114 \115 \116 \117 \120 \121 \122 \123 \124 \125 \126 \127 \130 \131 \132 \133 \134 \135 \136 \137 \140 \141 \142 \143 \144 \145 \146 \147 \150 \151 \152 \153 \154 \155 \156 \157 \160 \161 \162 \163 \164 \165 \166 \167 \170 \171 \172 \173 \174 \175 \176 \177 ", A, " "); +# for(n = 1; n < 128; n++) { +# lib_ascii_ASCII[A[n]] = n; +# } +# split("\200 \201 \202 \203 \204 \205 \206 \207 \210 \211 \212 \213 \214 \215 \216 \217 \220 \221 \222 \223 \224 \225 \226 \227 \230 \231 \232 \233 \234 \235 \236 \237 \240 \241 \242 \243 \244 \245 \246 \247 \250 \251 \252 \253 \254 \255 \256 \257 \260 \261 \262 \263 \264 \265 \266 \267 \270 \271 \272 \273 \274 \275 \276 \277 \300 \301 \302 \303 \304 \305 \306 \307 \310 \311 \312 \313 \314 \315 \316 \317 \320 \321 \322 \323 \324 \325 \326 \327 \330 \331 \332 \333 \334 \335 \336 \337 \340 \341 \342 \343 \344 \345 \346 \347 \350 \351 \352 \353 \354 \355 \356 \357 \360 \361 \362 \363 \364 \365 \366 \367 \370 \371 \372 \373 \374 \375 \376 \377", A, " "); +# for(n = 1; n < 128; n++) { +# lib_ascii_ASCII[A[n]] = n+128; +# } +# lib_ascii_ASCII["\000"] = 0 +# lib_ascii_ASCII[" "] = 32 + +# Fastest version on the same machine: 0.5 seconds +BEGIN { + lib_ascii_ASCII["\000"] = 0 + lib_ascii_ASCII["\001"] = 1 + lib_ascii_ASCII["\002"] = 2 + lib_ascii_ASCII["\003"] = 3 + lib_ascii_ASCII["\004"] = 4 + lib_ascii_ASCII["\005"] = 5 + lib_ascii_ASCII["\006"] = 6 + lib_ascii_ASCII["\007"] = 7 + lib_ascii_ASCII["\010"] = 8 + lib_ascii_ASCII["\011"] = 9 + lib_ascii_ASCII["\012"] = 10 + lib_ascii_ASCII["\013"] = 11 + lib_ascii_ASCII["\014"] = 12 + lib_ascii_ASCII["\015"] = 13 + lib_ascii_ASCII["\016"] = 14 + lib_ascii_ASCII["\017"] = 15 + lib_ascii_ASCII["\020"] = 16 + lib_ascii_ASCII["\021"] = 17 + lib_ascii_ASCII["\022"] = 18 + lib_ascii_ASCII["\023"] = 19 + lib_ascii_ASCII["\024"] = 20 + lib_ascii_ASCII["\025"] = 21 + lib_ascii_ASCII["\026"] = 22 + lib_ascii_ASCII["\027"] = 23 + lib_ascii_ASCII["\030"] = 24 + lib_ascii_ASCII["\031"] = 25 + lib_ascii_ASCII["\032"] = 26 + lib_ascii_ASCII["\033"] = 27 + lib_ascii_ASCII["\034"] = 28 + lib_ascii_ASCII["\035"] = 29 + lib_ascii_ASCII["\036"] = 30 + lib_ascii_ASCII["\037"] = 31 + lib_ascii_ASCII["\040"] = 32 + lib_ascii_ASCII["\041"] = 33 + lib_ascii_ASCII["\042"] = 34 + lib_ascii_ASCII["\043"] = 35 + lib_ascii_ASCII["\044"] = 36 + lib_ascii_ASCII["\045"] = 37 + lib_ascii_ASCII["\046"] = 38 + lib_ascii_ASCII["\047"] = 39 + lib_ascii_ASCII["\050"] = 40 + lib_ascii_ASCII["\051"] = 41 + lib_ascii_ASCII["\052"] = 42 + lib_ascii_ASCII["\053"] = 43 + lib_ascii_ASCII["\054"] = 44 + lib_ascii_ASCII["\055"] = 45 + lib_ascii_ASCII["\056"] = 46 + lib_ascii_ASCII["\057"] = 47 + lib_ascii_ASCII["\060"] = 48 + lib_ascii_ASCII["\061"] = 49 + lib_ascii_ASCII["\062"] = 50 + lib_ascii_ASCII["\063"] = 51 + lib_ascii_ASCII["\064"] = 52 + lib_ascii_ASCII["\065"] = 53 + lib_ascii_ASCII["\066"] = 54 + lib_ascii_ASCII["\067"] = 55 + lib_ascii_ASCII["\070"] = 56 + lib_ascii_ASCII["\071"] = 57 + lib_ascii_ASCII["\072"] = 58 + lib_ascii_ASCII["\073"] = 59 + lib_ascii_ASCII["\074"] = 60 + lib_ascii_ASCII["\075"] = 61 + lib_ascii_ASCII["\076"] = 62 + lib_ascii_ASCII["\077"] = 63 + lib_ascii_ASCII["\100"] = 64 + lib_ascii_ASCII["\101"] = 65 + lib_ascii_ASCII["\102"] = 66 + lib_ascii_ASCII["\103"] = 67 + lib_ascii_ASCII["\104"] = 68 + lib_ascii_ASCII["\105"] = 69 + lib_ascii_ASCII["\106"] = 70 + lib_ascii_ASCII["\107"] = 71 + lib_ascii_ASCII["\110"] = 72 + lib_ascii_ASCII["\111"] = 73 + lib_ascii_ASCII["\112"] = 74 + lib_ascii_ASCII["\113"] = 75 + lib_ascii_ASCII["\114"] = 76 + lib_ascii_ASCII["\115"] = 77 + lib_ascii_ASCII["\116"] = 78 + lib_ascii_ASCII["\117"] = 79 + lib_ascii_ASCII["\120"] = 80 + lib_ascii_ASCII["\121"] = 81 + lib_ascii_ASCII["\122"] = 82 + lib_ascii_ASCII["\123"] = 83 + lib_ascii_ASCII["\124"] = 84 + lib_ascii_ASCII["\125"] = 85 + lib_ascii_ASCII["\126"] = 86 + lib_ascii_ASCII["\127"] = 87 + lib_ascii_ASCII["\130"] = 88 + lib_ascii_ASCII["\131"] = 89 + lib_ascii_ASCII["\132"] = 90 + lib_ascii_ASCII["\133"] = 91 + lib_ascii_ASCII["\134"] = 92 + lib_ascii_ASCII["\135"] = 93 + lib_ascii_ASCII["\136"] = 94 + lib_ascii_ASCII["\137"] = 95 + lib_ascii_ASCII["\140"] = 96 + lib_ascii_ASCII["\141"] = 97 + lib_ascii_ASCII["\142"] = 98 + lib_ascii_ASCII["\143"] = 99 + lib_ascii_ASCII["\144"] = 100 + lib_ascii_ASCII["\145"] = 101 + lib_ascii_ASCII["\146"] = 102 + lib_ascii_ASCII["\147"] = 103 + lib_ascii_ASCII["\150"] = 104 + lib_ascii_ASCII["\151"] = 105 + lib_ascii_ASCII["\152"] = 106 + lib_ascii_ASCII["\153"] = 107 + lib_ascii_ASCII["\154"] = 108 + lib_ascii_ASCII["\155"] = 109 + lib_ascii_ASCII["\156"] = 110 + lib_ascii_ASCII["\157"] = 111 + lib_ascii_ASCII["\160"] = 112 + lib_ascii_ASCII["\161"] = 113 + lib_ascii_ASCII["\162"] = 114 + lib_ascii_ASCII["\163"] = 115 + lib_ascii_ASCII["\164"] = 116 + lib_ascii_ASCII["\165"] = 117 + lib_ascii_ASCII["\166"] = 118 + lib_ascii_ASCII["\167"] = 119 + lib_ascii_ASCII["\170"] = 120 + lib_ascii_ASCII["\171"] = 121 + lib_ascii_ASCII["\172"] = 122 + lib_ascii_ASCII["\173"] = 123 + lib_ascii_ASCII["\174"] = 124 + lib_ascii_ASCII["\175"] = 125 + lib_ascii_ASCII["\176"] = 126 + lib_ascii_ASCII["\177"] = 127 + lib_ascii_ASCII["\200"] = 128 + lib_ascii_ASCII["\201"] = 129 + lib_ascii_ASCII["\202"] = 130 + lib_ascii_ASCII["\203"] = 131 + lib_ascii_ASCII["\204"] = 132 + lib_ascii_ASCII["\205"] = 133 + lib_ascii_ASCII["\206"] = 134 + lib_ascii_ASCII["\207"] = 135 + lib_ascii_ASCII["\210"] = 136 + lib_ascii_ASCII["\211"] = 137 + lib_ascii_ASCII["\212"] = 138 + lib_ascii_ASCII["\213"] = 139 + lib_ascii_ASCII["\214"] = 140 + lib_ascii_ASCII["\215"] = 141 + lib_ascii_ASCII["\216"] = 142 + lib_ascii_ASCII["\217"] = 143 + lib_ascii_ASCII["\220"] = 144 + lib_ascii_ASCII["\221"] = 145 + lib_ascii_ASCII["\222"] = 146 + lib_ascii_ASCII["\223"] = 147 + lib_ascii_ASCII["\224"] = 148 + lib_ascii_ASCII["\225"] = 149 + lib_ascii_ASCII["\226"] = 150 + lib_ascii_ASCII["\227"] = 151 + lib_ascii_ASCII["\230"] = 152 + lib_ascii_ASCII["\231"] = 153 + lib_ascii_ASCII["\232"] = 154 + lib_ascii_ASCII["\233"] = 155 + lib_ascii_ASCII["\234"] = 156 + lib_ascii_ASCII["\235"] = 157 + lib_ascii_ASCII["\236"] = 158 + lib_ascii_ASCII["\237"] = 159 + lib_ascii_ASCII["\240"] = 160 + lib_ascii_ASCII["\241"] = 161 + lib_ascii_ASCII["\242"] = 162 + lib_ascii_ASCII["\243"] = 163 + lib_ascii_ASCII["\244"] = 164 + lib_ascii_ASCII["\245"] = 165 + lib_ascii_ASCII["\246"] = 166 + lib_ascii_ASCII["\247"] = 167 + lib_ascii_ASCII["\250"] = 168 + lib_ascii_ASCII["\251"] = 169 + lib_ascii_ASCII["\252"] = 170 + lib_ascii_ASCII["\253"] = 171 + lib_ascii_ASCII["\254"] = 172 + lib_ascii_ASCII["\255"] = 173 + lib_ascii_ASCII["\256"] = 174 + lib_ascii_ASCII["\257"] = 175 + lib_ascii_ASCII["\260"] = 176 + lib_ascii_ASCII["\261"] = 177 + lib_ascii_ASCII["\262"] = 178 + lib_ascii_ASCII["\263"] = 179 + lib_ascii_ASCII["\264"] = 180 + lib_ascii_ASCII["\265"] = 181 + lib_ascii_ASCII["\266"] = 182 + lib_ascii_ASCII["\267"] = 183 + lib_ascii_ASCII["\270"] = 184 + lib_ascii_ASCII["\271"] = 185 + lib_ascii_ASCII["\272"] = 186 + lib_ascii_ASCII["\273"] = 187 + lib_ascii_ASCII["\274"] = 188 + lib_ascii_ASCII["\275"] = 189 + lib_ascii_ASCII["\276"] = 190 + lib_ascii_ASCII["\277"] = 191 + lib_ascii_ASCII["\300"] = 192 + lib_ascii_ASCII["\301"] = 193 + lib_ascii_ASCII["\302"] = 194 + lib_ascii_ASCII["\303"] = 195 + lib_ascii_ASCII["\304"] = 196 + lib_ascii_ASCII["\305"] = 197 + lib_ascii_ASCII["\306"] = 198 + lib_ascii_ASCII["\307"] = 199 + lib_ascii_ASCII["\310"] = 200 + lib_ascii_ASCII["\311"] = 201 + lib_ascii_ASCII["\312"] = 202 + lib_ascii_ASCII["\313"] = 203 + lib_ascii_ASCII["\314"] = 204 + lib_ascii_ASCII["\315"] = 205 + lib_ascii_ASCII["\316"] = 206 + lib_ascii_ASCII["\317"] = 207 + lib_ascii_ASCII["\320"] = 208 + lib_ascii_ASCII["\321"] = 209 + lib_ascii_ASCII["\322"] = 210 + lib_ascii_ASCII["\323"] = 211 + lib_ascii_ASCII["\324"] = 212 + lib_ascii_ASCII["\325"] = 213 + lib_ascii_ASCII["\326"] = 214 + lib_ascii_ASCII["\327"] = 215 + lib_ascii_ASCII["\330"] = 216 + lib_ascii_ASCII["\331"] = 217 + lib_ascii_ASCII["\332"] = 218 + lib_ascii_ASCII["\333"] = 219 + lib_ascii_ASCII["\334"] = 220 + lib_ascii_ASCII["\335"] = 221 + lib_ascii_ASCII["\336"] = 222 + lib_ascii_ASCII["\337"] = 223 + lib_ascii_ASCII["\340"] = 224 + lib_ascii_ASCII["\341"] = 225 + lib_ascii_ASCII["\342"] = 226 + lib_ascii_ASCII["\343"] = 227 + lib_ascii_ASCII["\344"] = 228 + lib_ascii_ASCII["\345"] = 229 + lib_ascii_ASCII["\346"] = 230 + lib_ascii_ASCII["\347"] = 231 + lib_ascii_ASCII["\350"] = 232 + lib_ascii_ASCII["\351"] = 233 + lib_ascii_ASCII["\352"] = 234 + lib_ascii_ASCII["\353"] = 235 + lib_ascii_ASCII["\354"] = 236 + lib_ascii_ASCII["\355"] = 237 + lib_ascii_ASCII["\356"] = 238 + lib_ascii_ASCII["\357"] = 239 + lib_ascii_ASCII["\360"] = 240 + lib_ascii_ASCII["\361"] = 241 + lib_ascii_ASCII["\362"] = 242 + lib_ascii_ASCII["\363"] = 243 + lib_ascii_ASCII["\364"] = 244 + lib_ascii_ASCII["\365"] = 245 + lib_ascii_ASCII["\366"] = 246 + lib_ascii_ASCII["\367"] = 247 + lib_ascii_ASCII["\370"] = 248 + lib_ascii_ASCII["\371"] = 249 + lib_ascii_ASCII["\372"] = 250 + lib_ascii_ASCII["\373"] = 251 + lib_ascii_ASCII["\374"] = 252 + lib_ascii_ASCII["\375"] = 253 + lib_ascii_ASCII["\376"] = 254 + lib_ascii_ASCII["\377"] = 255 +} + diff --git a/src/awklib/lib_hex.awk b/src/awklib/lib_hex.awk new file mode 100644 index 0000000..98866d9 --- /dev/null +++ b/src/awklib/lib_hex.awk @@ -0,0 +1,50 @@ +function lib_hex_version() +{ + return 1 +} + +BEGIN { + LIB_HEX["0"] = 0 + LIB_HEX["1"] = 1 + LIB_HEX["2"] = 2 + LIB_HEX["3"] = 3 + LIB_HEX["4"] = 4 + LIB_HEX["5"] = 5 + LIB_HEX["6"] = 6 + LIB_HEX["7"] = 7 + LIB_HEX["8"] = 8 + LIB_HEX["9"] = 9 + LIB_HEX["A"] = 10 + LIB_HEX["a"] = 10 + LIB_HEX["B"] = 11 + LIB_HEX["b"] = 11 + LIB_HEX["C"] = 12 + LIB_HEX["c"] = 12 + LIB_HEX["D"] = 13 + LIB_HEX["d"] = 13 + LIB_HEX["E"] = 14 + LIB_HEX["e"] = 14 + LIB_HEX["F"] = 15 + LIB_HEX["f"] = 15 +} + +# converts hex number in string to integer; returns "" on error +function lib_hex_str2int(s ,v,n,l,c) +{ + sub("^0x", "", s) + v = 0; + l = length(s) + for(n = 1; n <= length(s); n++) { + c = substr(s, n, 1) + if (!(c in LIB_HEX)) + return "" + v = v * 16 + LIB_HEX[c]; + } + return v +} + +function lib_hex_int2str(i) +{ + return sprintf("0x%x", i) +} + diff --git a/src/awklib/lib_rand.awk b/src/awklib/lib_rand.awk new file mode 100644 index 0000000..f5b3d5f --- /dev/null +++ b/src/awklib/lib_rand.awk @@ -0,0 +1,29 @@ +function lib_rand_version() +{ + return 1 +} +# core code by nsz + + +# STATE is an array that stores all states of a random queue +# random queues generate pseudo random numbers independently of other queues + +# set random seed on STATE +# if seed is not provided, a random number us choosen (use srand() before that) +function lib_rand_srand(STATE, seed) { + if (seed == "") + seed = rand() + STATE[lib_rand_state] = lib_rand_mod_2_26(int(seed)) +} + +# get next random number from queue STATE +function lib_rand_rand(STATE) { + STATE[lib_rand_state] = lib_rand_mod_2_26(54514969*STATE[lib_rand_state] + 21095981) + return STATE[lib_rand_state]/67108864 +} + +# internal +function lib_rand_mod_2_26(n) { + n -= int(n/67108864)*67108864 + return n +} diff --git a/src/awklib/regression/Makefile.in b/src/awklib/regression/Makefile.in new file mode 100644 index 0000000..cc55ce5 --- /dev/null +++ b/src/awklib/regression/Makefile.in @@ -0,0 +1,33 @@ +put /local/tests {array rand} +put /local/tester {../../libmawk/lmawk} + +print [@ +# Generated by scconfig from Makefile.in + +all:@] + +foreach /local/n in /local/tests + print [@@/local/n@.diff@] +end + +print {\n @echo "QC passed."} + +foreach /local/n in /local/tests +print [^ +^/local/n^.out: ^/local/n^.awk ^/local/tester^ + @^/local/tester^ -f ^/local/n^.awk > ^/local/n^.out + +^/local/n^.diff: ^/local/n^.OUT ^/local/n^.out + @diff ^/local/n^.OUT ^/local/n^.out +^] +end + +print [@ +clean: + rm @] + +foreach /local/n in /local/tests + print [@ @/local/n@.diff @/local/n@.out@] +end + +print { 2>/dev/null; true\n\n} diff --git a/src/awklib/regression/array.OUT b/src/awklib/regression/array.OUT new file mode 100644 index 0000000..3876c95 --- /dev/null +++ b/src/awklib/regression/array.OUT @@ -0,0 +1,28 @@ +Array: + A[1] = 'one' + A[2] = 'two' + A[3] = 'three' + A[4] = 'four' + A[5] = 'five' + A[6] = 'six' + A[7] = 'seven' + A[8] = 'eight' +Pack/Unpack A: + B[10] = '1one2two3three4four5five6six7seven8eight' + B[1] = 'one' + B[2] = 'two' + B[3] = 'three' + B[4] = 'four' + B[5] = 'five' + B[6] = 'six' + B[7] = 'seven' + B[8] = 'eight' +Pack/Unpack A[10]: + C[1] = 'one' + C[2] = 'two' + C[3] = 'three' + C[4] = 'four' + C[5] = 'five' + C[6] = 'six' + C[7] = 'seven' + C[8] = 'eight' diff --git a/src/awklib/regression/array.awk b/src/awklib/regression/array.awk new file mode 100644 index 0000000..59049af --- /dev/null +++ b/src/awklib/regression/array.awk @@ -0,0 +1,22 @@ +include "../lib_array.awk" + +BEGIN { + v = split("one two three four five six seven eight", A, " "); + print "Array:" + lib_array_print(" A", A); + + str = lib_array_pack(A); + A[10] = str + + str = lib_array_pack(A); + + lib_array_unpack(B, str); + + print "Pack/Unpack A:" + lib_array_print(" B", B); + + + print "Pack/Unpack A[10]:" + lib_array_unpack(C, A[10]); + lib_array_print(" C", C); +} diff --git a/src/awklib/regression/hex.OUT b/src/awklib/regression/hex.OUT new file mode 100644 index 0000000..7e13469 --- /dev/null +++ b/src/awklib/regression/hex.OUT @@ -0,0 +1,4 @@ +591751040 +430689775 +43690 +43690 diff --git a/src/awklib/regression/hex.awk b/src/awklib/regression/hex.awk new file mode 100644 index 0000000..d7c74a8 --- /dev/null +++ b/src/awklib/regression/hex.awk @@ -0,0 +1,7 @@ +include "../lib_hex.awk" +BEGIN { + print lib_hex_str2int("23456780") + print lib_hex_str2int("19aBcDeF") + print lib_hex_str2int("0xaaaa") + print lib_hex_str2int("aaaa") +} diff --git a/src/awklib/regression/rand.OUT b/src/awklib/regression/rand.OUT new file mode 100644 index 0000000..8a9bdca --- /dev/null +++ b/src/awklib/regression/rand.OUT @@ -0,0 +1,3 @@ +q1 sum: 4.53303 +q1 sum: 4.53303 +q2 sum: 5.8163 diff --git a/src/awklib/regression/rand.awk b/src/awklib/regression/rand.awk new file mode 100644 index 0000000..7e0a430 --- /dev/null +++ b/src/awklib/regression/rand.awk @@ -0,0 +1,19 @@ +include "../lib_rand.awk" +BEGIN { + lib_rand_srand(Q1, 1234567) + for (i=1; i<=10; i++) + q1 += lib_rand_rand(Q1) + + print "q1 sum: " q1 + + q1=0 + lib_rand_srand(Q1, 1234567) + lib_rand_srand(Q2, 7654321) + for (i=1; i<=10; i++) { + q1 += lib_rand_rand(Q1) + q2 += lib_rand_rand(Q2) + } + + print "q1 sum: " q1 + print "q2 sum: " q2 +} diff --git a/src/example_apps/10_run/Makefile b/src/example_apps/10_run/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/10_run/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/10_run/app.c b/src/example_apps/10_run/app.c new file mode 100644 index 0000000..a62edc7 --- /dev/null +++ b/src/example_apps/10_run/app.c @@ -0,0 +1,33 @@ +#include +#include + +/* + Purpose: load and run a script using the command line syntax of mawk but + using a virtual stdin buffer instead of the real stdin. + Run: ./app -f test.awk +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* feed in some data on the virtual stdin */ + libmawk_append_input(m, "This is a\nmultiline test input\nfor the artificial input buffer.\n"); + + /* run the MAIN part of the script as long as there's data in the buffer of + the virtual stdin */ + libmawk_run_main(m); + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/10_run/out.ref b/src/example_apps/10_run/out.ref new file mode 100644 index 0000000..e12d80b --- /dev/null +++ b/src/example_apps/10_run/out.ref @@ -0,0 +1,5 @@ +script: BEGIN +script: input: "This is a" +script: input: "multiline test input" +script: input: "for the artificial input buffer." +script: END diff --git a/src/example_apps/10_run/test.awk b/src/example_apps/10_run/test.awk new file mode 100644 index 0000000..620a698 --- /dev/null +++ b/src/example_apps/10_run/test.awk @@ -0,0 +1,3 @@ +BEGIN { print "script: BEGIN" } + { print "script: input: \"" $0 "\"" } +END { print "script: END" } diff --git a/src/example_apps/12_input/Makefile b/src/example_apps/12_input/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/12_input/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/12_input/app.c b/src/example_apps/12_input/app.c new file mode 100644 index 0000000..6093122 --- /dev/null +++ b/src/example_apps/12_input/app.c @@ -0,0 +1,79 @@ +#include +#include + +/* +(this test is broken yet, waiting for vio to be finished) + Purpose: demonstrate how virtual stdin works + Run: ./app -f test.awk +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* run the MAIN part of the script; assume a record is a line + (with standard FS in awk this is true). The MAIN part of + the script consists of _all_ rules except for BEGINs and ENDs. */ + + /* The input buffer of the script is empty at start. Attempting to run + MAIN on an empty buffer will return immediately, without any awk + code being executed */ + printf("app: test point 1\n"); + libmawk_run_main(m); + + /* Load a full record and invoke MAIN; this will run rules once, for this + line: */ + printf("app: test point 2\n"); + libmawk_append_input(m, "First line.\n"); + libmawk_run_main(m); + + /* Load a multiple records and invoke MAIN; this will run the rules + for all full (terminated) records: + */ + printf("app: test point 3\n"); + libmawk_append_input(m, "Second line.\nThird line.\nFourth "); + libmawk_run_main(m); + + /* At this point we have a partial record ("Fourth ") left in the + buffer. Running MAIN without terminating this record will have + the same effect as running MAIN on an empty buffer: + */ + printf("app: test point 4\n"); + libmawk_run_main(m); + + /* Terminate the incomplete record and run: will invoke rules on the + now-complete record */ + printf("app: test point 5\n"); + libmawk_append_input(m, "line.\n"); + libmawk_run_main(m); + + + /* the number of append calls does not matter: */ + printf("app: test point 6\n"); + libmawk_append_input(m, "5th line.\n"); + libmawk_append_input(m, "6th line.\n7th "); + libmawk_append_input(m, "line.\n"); + libmawk_run_main(m); + + /* if there is a partial record at the end of the input stream, that + is read and processed as a full record */ + printf("app: test point 7\n"); + libmawk_append_input(m, "partial 8th 'line'\n"); + libmawk_close_input(m); + libmawk_run_main(m); + + /* free the context - this won't run END because END had to run already + for eof-on-stdin */ + printf("app: test point 8\n"); + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/12_input/out.ref b/src/example_apps/12_input/out.ref new file mode 100644 index 0000000..9d93fc5 --- /dev/null +++ b/src/example_apps/12_input/out.ref @@ -0,0 +1,18 @@ +script: BEGIN +app: test point 1 +app: test point 2 +script: input: "First line." +app: test point 3 +script: input: "Second line." +script: input: "Third line." +app: test point 4 +app: test point 5 +script: input: "Fourth line." +app: test point 6 +script: input: "5th line." +script: input: "6th line." +script: input: "7th line." +app: test point 7 +script: input: "partial 8th 'line'" +script: END +app: test point 8 diff --git a/src/example_apps/12_input/test.awk b/src/example_apps/12_input/test.awk new file mode 100644 index 0000000..620a698 --- /dev/null +++ b/src/example_apps/12_input/test.awk @@ -0,0 +1,3 @@ +BEGIN { print "script: BEGIN" } + { print "script: input: \"" $0 "\"" } +END { print "script: END" } diff --git a/src/example_apps/12_multi/Makefile b/src/example_apps/12_multi/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/12_multi/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/12_multi/app.c b/src/example_apps/12_multi/app.c new file mode 100644 index 0000000..a2442e9 --- /dev/null +++ b/src/example_apps/12_multi/app.c @@ -0,0 +1,56 @@ +#include +#include + +/* + Purpose: load the same script multiple times, running the instances + in parallel with different input. There's no race condition on + the output: scripts execution is strictly dictated by the + (single thread) app and both scripts are writing the same output + stream. + Run: ./app -f test.awk +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m1, *m2; + int n; + + /* init a context, execute BEGIN */ + m1 = libmawk_initialize(argc, argv); + m2 = libmawk_initialize(argc, argv); + if ((m1 == NULL) || (m2 == NULL)) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* feed in some data on the virtual stdin */ + libmawk_append_input(m1, "[1] This is a\n[1] multiline test input\n[1] for the artificial input buffer.\n"); + libmawk_append_input(m2, "[2] This is a\n[2] multiline test input\n[2] for the artificial input buffer.\n"); + + /* run the MAIN part of the script as long as there's data in the buffer of + the virtual stdin; this makes m1 process all available input before m2 + starts processing its input. */ + libmawk_run_main(m1); + libmawk_run_main(m2); + + /* run in parallel, record by record - inject only one record in the input + buffer before running the script */ + for(n = 0; n < 4; n++) { + char tmp[32]; + + sprintf(tmp, "[1] record %d\n", n); + libmawk_append_input(m1, tmp); + libmawk_run_main(m1); + + sprintf(tmp, "[2] record %d\n", n); + libmawk_append_input(m2, tmp); + libmawk_run_main(m2); + } + + /* run END and free the context */ + libmawk_uninitialize(m1); + libmawk_uninitialize(m2); + + return 0; +} + diff --git a/src/example_apps/12_multi/out.ref b/src/example_apps/12_multi/out.ref new file mode 100644 index 0000000..f4b8673 --- /dev/null +++ b/src/example_apps/12_multi/out.ref @@ -0,0 +1,18 @@ +script: BEGIN +script: BEGIN +script: input: "[1] This is a" +script: input: "[1] multiline test input" +script: input: "[1] for the artificial input buffer." +script: input: "[2] This is a" +script: input: "[2] multiline test input" +script: input: "[2] for the artificial input buffer." +script: input: "[1] record 0" +script: input: "[2] record 0" +script: input: "[1] record 1" +script: input: "[2] record 1" +script: input: "[1] record 2" +script: input: "[2] record 2" +script: input: "[1] record 3" +script: input: "[2] record 3" +script: END +script: END diff --git a/src/example_apps/12_multi/test.awk b/src/example_apps/12_multi/test.awk new file mode 100644 index 0000000..620a698 --- /dev/null +++ b/src/example_apps/12_multi/test.awk @@ -0,0 +1,3 @@ +BEGIN { print "script: BEGIN" } + { print "script: input: \"" $0 "\"" } +END { print "script: END" } diff --git a/src/example_apps/15_call/Makefile b/src/example_apps/15_call/Makefile new file mode 100644 index 0000000..b96975b --- /dev/null +++ b/src/example_apps/15_call/Makefile @@ -0,0 +1,24 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + + diff --git a/src/example_apps/15_call/app.c b/src/example_apps/15_call/app.c new file mode 100644 index 0000000..a01bc59 --- /dev/null +++ b/src/example_apps/15_call/app.c @@ -0,0 +1,65 @@ +#include +#include + +/* + Purpose: demonstrate how to call an awk user function from the app + Run: ./app -f test.awk +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + mawk_cell_t ret = libmawk_empty_cell; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* call user function foo with 3 arguments; the "format string" is similar + to printf's without "%": d (int), f (float) and s (\0 terminated string) + The actual arguments are taken as varargs. This function is designed + for static calls. + NOTE: expect the function to return; that is, the function may not getline + from FIFOs, as that may cause them to return as "interrupted waiting + for input" in which case the app should fill the FIFO and resume + running the function. + */ + if (libmawk_call_function(m, "foo", &ret, "dfs", (int)42, (double)1.234, (char *)"test string1.") == MAWK_EXER_FUNCRET) { + char buff[32]; + printf("app: return value of foo is '%s'\n", libmawk_print_cell(m, &ret, buff, sizeof(buff))); + libmawk_cell_destroy(m, &ret); + } + else { + printf("app: error: function foo didn't return\n"); + goto quit; + } + + /* this is the same function call with a syntax more suitable for dynamic + calls. Same limitation on getline applies. */ + { + int i = 42; + double d = 1.234; + char *s = "test string2."; + void *args[] = {&i, &d, s}; + if (libmawk_call_functionp(m, "foo", &ret, "dfs", args) == MAWK_EXER_FUNCRET) { + char buff[32]; + printf("app: return value of func foo '%s'\n", libmawk_print_cell(m, &ret, buff, sizeof(buff))); + libmawk_cell_destroy(m, &ret); + } + else { + printf("app: error: function foo didn't return\n"); + goto quit; + } + } + + +quit:; + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/15_call/out.ref b/src/example_apps/15_call/out.ref new file mode 100644 index 0000000..153e69c --- /dev/null +++ b/src/example_apps/15_call/out.ref @@ -0,0 +1,6 @@ +script: BEGIN +script: foo(42,1.234,test string1.) +app: return value of foo is '43.234000' +script: foo(42,1.234,test string2.) +app: return value of func foo '43.234000' +script: END diff --git a/src/example_apps/15_call/test.awk b/src/example_apps/15_call/test.awk new file mode 100644 index 0000000..8c37e7d --- /dev/null +++ b/src/example_apps/15_call/test.awk @@ -0,0 +1,9 @@ +function foo(a, b, c) +{ + print "script: foo(" a "," b "," c ")" + return a+b +} + +BEGIN { print "script: BEGIN" } + { print "script: input: \"" $0 "\"" } +END { print "script: END" } diff --git a/src/example_apps/15_get_array/Makefile b/src/example_apps/15_get_array/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/15_get_array/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/15_get_array/app.c b/src/example_apps/15_get_array/app.c new file mode 100644 index 0000000..deed2fa --- /dev/null +++ b/src/example_apps/15_get_array/app.c @@ -0,0 +1,62 @@ +#include +#include + +/* + Purpose: demonstrate how to get the current value of an existing array index; + Run: ./app -f test.awk +*/ + +void print_bar(mawk_state_t *m) +{ + mawk_cell_t c = libmawk_empty_cell; + char buff[32]; + + /* get the value of bar["wow"] into &c; the last 0 in the call means + the index should not be created in the array if it does not exist. + The array is not created if it didn't exist but -1 is returned. + NOTE: c must be initialized empty because it gets destroyed in + the call. */ + switch(libmawk_get_array_at(m, "bar", "wow", &c, 0)) { + case -1: + printf("No such array \"bar[]\"\n"); + break; + case 0: + printf("No \"wow\" in array \"bar[]\"\n"); + break; + case 1: + printf("app: bar = '%s'\n", libmawk_print_cell(m, &c, buff, sizeof(buff))); + libmawk_cell_destroy(m, &c); + } +} + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* print value of bar right after BEGIN */ + print_bar(m); + + /* feed in some data on the virtual stdin */ + libmawk_append_input(m, "This is a\nmultiline test input\nfor the artificial input buffer.\n"); + + /* run the MAIN part of the script as long as there's data in the buffer of + the virtual stdin */ + libmawk_run_main(m); + + /* print value of bar after running the script on a few records */ + print_bar(m); + + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/15_get_array/out.ref b/src/example_apps/15_get_array/out.ref new file mode 100644 index 0000000..11004ff --- /dev/null +++ b/src/example_apps/15_get_array/out.ref @@ -0,0 +1,7 @@ +script: BEGIN +app: bar = '42' +script: input: "This is a" +script: input: "multiline test input" +script: input: "for the artificial input buffer." +app: bar = '45' +script: END diff --git a/src/example_apps/15_get_array/test.awk b/src/example_apps/15_get_array/test.awk new file mode 100644 index 0000000..de1850c --- /dev/null +++ b/src/example_apps/15_get_array/test.awk @@ -0,0 +1,13 @@ +BEGIN { + print "script: BEGIN" + bar["wow"] = 42 +} + +{ + print "script: input: \"" $0 "\"" + bar["wow"]++ +} + +END { + print "script: END" +} diff --git a/src/example_apps/15_get_scalar/Makefile b/src/example_apps/15_get_scalar/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/15_get_scalar/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/15_get_scalar/app.c b/src/example_apps/15_get_scalar/app.c new file mode 100644 index 0000000..1bb289a --- /dev/null +++ b/src/example_apps/15_get_scalar/app.c @@ -0,0 +1,56 @@ +#include +#include + +/* + Purpose: demonstrate how to get the current value of a variable; + this basic example does not cover how to efficiently use + the value (the cell of the variable), only converts it to + string for printing. Please refer to examples TODO for + more details on cell conversions. + Run: ./app -f test.awk +*/ + +void print_bar(mawk_state_t *m) +{ + const mawk_cell_t *c; + char buff[32]; + + c = libmawk_get_var(m, "bar"); + + if (c != NULL) + printf("app: bar = '%s'\n", libmawk_print_cell(m, c, buff, sizeof(buff))); + else + printf("No such variable \"bar\"\n"); +} + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* print value of bar right after BEGIN */ + print_bar(m); + + /* feed in some data on the virtual stdin */ + libmawk_append_input(m, "This is a\nmultiline test input\nfor the artificial input buffer.\n"); + + /* run the MAIN part of the script as long as there's data in the buffer of + the virtual stdin */ + libmawk_run_main(m); + + /* print value of bar after running the script on a few records */ + print_bar(m); + + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/15_get_scalar/out.ref b/src/example_apps/15_get_scalar/out.ref new file mode 100644 index 0000000..11004ff --- /dev/null +++ b/src/example_apps/15_get_scalar/out.ref @@ -0,0 +1,7 @@ +script: BEGIN +app: bar = '42' +script: input: "This is a" +script: input: "multiline test input" +script: input: "for the artificial input buffer." +app: bar = '45' +script: END diff --git a/src/example_apps/15_get_scalar/test.awk b/src/example_apps/15_get_scalar/test.awk new file mode 100644 index 0000000..a8bfe11 --- /dev/null +++ b/src/example_apps/15_get_scalar/test.awk @@ -0,0 +1,13 @@ +BEGIN { + print "script: BEGIN" + bar = 42 +} + +{ + print "script: input: \"" $0 "\"" + bar++ +} + +END { + print "script: END" +} diff --git a/src/example_apps/20_init_wired/Makefile b/src/example_apps/20_init_wired/Makefile new file mode 100644 index 0000000..100d1d7 --- /dev/null +++ b/src/example_apps/20_init_wired/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/20_init_wired/app.c b/src/example_apps/20_init_wired/app.c new file mode 100644 index 0000000..dc41068 --- /dev/null +++ b/src/example_apps/20_init_wired/app.c @@ -0,0 +1,31 @@ +#include +#include + +/* + Purpose: ignore command line args and load a script from a hardwired path + Run: ./app +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + libmawk_initialize_stdio(m, 0, 1, 1); /* set up default stdio: stdin is a pipe, stdout and stderr are bound to the app's stdout and stderr with no-close-on-exit */ + mawk_append_input_file(m, "test.awk", 0); /* force load test.awk */ + m = libmawk_initialize_stage2(m, 0, NULL); /* set up with no arguments */ + m = libmawk_initialize_stage3(m); /* execute BEGIN */ + + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/20_init_wired/out.ref b/src/example_apps/20_init_wired/out.ref new file mode 100644 index 0000000..8787538 --- /dev/null +++ b/src/example_apps/20_init_wired/out.ref @@ -0,0 +1,2 @@ +script: BEGIN test.awk +script: END test.awk diff --git a/src/example_apps/20_init_wired/test.awk b/src/example_apps/20_init_wired/test.awk new file mode 100644 index 0000000..a95c791 --- /dev/null +++ b/src/example_apps/20_init_wired/test.awk @@ -0,0 +1,2 @@ +BEGIN { print "script: BEGIN test.awk" } +END { print "script: END test.awk" } diff --git a/src/example_apps/20_uninit_staged/Makefile b/src/example_apps/20_uninit_staged/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/20_uninit_staged/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/20_uninit_staged/app.c b/src/example_apps/20_uninit_staged/app.c new file mode 100644 index 0000000..4b05936 --- /dev/null +++ b/src/example_apps/20_uninit_staged/app.c @@ -0,0 +1,44 @@ +#include +#include + +/* + Purpose: multi-stage uninit: collect results calculated in the END {} block + Run: ./app -f test.awk +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* run END */ + libmawk_uninitialize_stage1(m); + + /* print variable "script_state" */ + { + const mawk_cell_t *c; + char buff[32]; + c = libmawk_get_var(m, "script_state"); + if (c != NULL) + printf("app: script_state = '%s'\n", libmawk_print_cell(m, c, buff, sizeof(buff))); + else + printf("No such variable \"script_state\"\n"); + } + + /* free the context */ + libmawk_uninitialize_stage2(m); + + return 0; +} + diff --git a/src/example_apps/20_uninit_staged/out.ref b/src/example_apps/20_uninit_staged/out.ref new file mode 100644 index 0000000..f35de60 --- /dev/null +++ b/src/example_apps/20_uninit_staged/out.ref @@ -0,0 +1,3 @@ +script: BEGIN test.awk +script: END test.awk +app: script_state = 'begin+end' diff --git a/src/example_apps/20_uninit_staged/test.awk b/src/example_apps/20_uninit_staged/test.awk new file mode 100644 index 0000000..cc92e1f --- /dev/null +++ b/src/example_apps/20_uninit_staged/test.awk @@ -0,0 +1,4 @@ +BEGIN { print "script: BEGIN test.awk"; script_state = "begin" } + { print "script: input"; script_state = script_state "+input" } +END { print "script: END test.awk"; script_state = script_state "+end" } + diff --git a/src/example_apps/25_set_array/Makefile b/src/example_apps/25_set_array/Makefile new file mode 100644 index 0000000..ff4a919 --- /dev/null +++ b/src/example_apps/25_set_array/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk | sort + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/25_set_array/app.c b/src/example_apps/25_set_array/app.c new file mode 100644 index 0000000..d853b45 --- /dev/null +++ b/src/example_apps/25_set_array/app.c @@ -0,0 +1,41 @@ +#include +#include + +/* + Purpose: demonstrate how to change the value of an existing array + Run: ./app -f test.awk | sort +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + double dbl; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* setting an index in an array means existing data is overwritten + or new data is allocated/registered. There are calls with different + conventions doing the same, to make the app developer's life easier. */ + + /* Vararg version, for static/hardwired data: */ + libmawk_set_array_at(m, "bar", "one", 'd', 42); + + /* Similar version with a void * pointer as value: */ + dbl=3.141592654; + libmawk_set_array_atp(m, "bar", "two", 'f', &dbl); + + /* The above two were creating new indixes in the array; the below call + will overwrite an existing one created by the script */ + libmawk_set_array_at(m, "bar", "wow", 's', "no way!"); + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/25_set_array/out.ref b/src/example_apps/25_set_array/out.ref new file mode 100644 index 0000000..218758b --- /dev/null +++ b/src/example_apps/25_set_array/out.ref @@ -0,0 +1,3 @@ +script: one 42 +script: two 3.14159 +script: wow no way! diff --git a/src/example_apps/25_set_array/test.awk b/src/example_apps/25_set_array/test.awk new file mode 100644 index 0000000..ab58a96 --- /dev/null +++ b/src/example_apps/25_set_array/test.awk @@ -0,0 +1,8 @@ +BEGIN { + bar["wow"] = 42 +} + +END { + for(idx in bar) + print "script:", idx, bar[idx] +} diff --git a/src/example_apps/30_out_pipes/Makefile b/src/example_apps/30_out_pipes/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/30_out_pipes/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/30_out_pipes/app.c b/src/example_apps/30_out_pipes/app.c new file mode 100644 index 0000000..b4a150d --- /dev/null +++ b/src/example_apps/30_out_pipes/app.c @@ -0,0 +1,56 @@ +#include +#include + +/* + Purpose: wire stdout to a pipe and process the output of the script; + simple setup + Run: ./app -f test.awk +*/ + +void print_pipe_pending(mawk_state_t *m, mawk_vio_t *vf, char *sep) +{ + for(;;) { + int len; + char buf[1024]; + len = mawk_vio_fifo_read_app(m, vf, buf, sizeof(buf)-1); + if (len <= 0) + return; + buf[len] = '\0'; + printf("<%s>\n%s\n", sep, buf, sep); + } +} + +int main(int argc, char **argv) +{ + mawk_state_t *m; + mawk_vio_t *vf_stdin, *vf_stdout, *vf_log; + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + + /* set up pipes: stdin is a fifo, stdout is a fifo, stderr is app's stderr */ + libmawk_initialize_stdio(m, 0, 0, 1); + + m = libmawk_initialize_stage2(m, argc, argv); /* set up with CLI arguments */ + m = libmawk_initialize_stage3(m); /* execute BEGIN */ + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* libmawk_append_input() operates on "/dev/stdin" as registered, if it is + a pipe, so it is compatible with manual setup */ + libmawk_append_input(m, "Hello world!\n"); + libmawk_run_main(m); + + + /* print all the stdout the script produced so far */ + print_pipe_pending(m, m->fnode_stdout->vf, "stdout"); + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/30_out_pipes/out.ref b/src/example_apps/30_out_pipes/out.ref new file mode 100644 index 0000000..32b1252 --- /dev/null +++ b/src/example_apps/30_out_pipes/out.ref @@ -0,0 +1,4 @@ + +BEGIN test.awk +stdin=Hello world! + diff --git a/src/example_apps/30_out_pipes/test.awk b/src/example_apps/30_out_pipes/test.awk new file mode 100644 index 0000000..2ab3d5a --- /dev/null +++ b/src/example_apps/30_out_pipes/test.awk @@ -0,0 +1,9 @@ +BEGIN { + print "BEGIN test.awk" + print "this is the initial log entry." > "log" + print "foobar" > "real_file" +} + +{ + print "stdin=" $0 +} diff --git a/src/example_apps/31_out_pipes_manual/Makefile b/src/example_apps/31_out_pipes_manual/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/31_out_pipes_manual/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/31_out_pipes_manual/app.c b/src/example_apps/31_out_pipes_manual/app.c new file mode 100644 index 0000000..d0554a5 --- /dev/null +++ b/src/example_apps/31_out_pipes_manual/app.c @@ -0,0 +1,64 @@ +#include +#include + +/* + Purpose: wire stdout to a pipe and process the output of the script and + hijack output file "log". Manual setup. + Run: ./app -f test.awk +*/ + +void print_pipe_pending(mawk_state_t *m, mawk_vio_t *vf, char *sep) +{ + for(;;) { + int len; + char buf[1024]; + len = mawk_vio_fifo_read_app(m, vf, buf, sizeof(buf)-1); + if (len <= 0) + return; + buf[len] = '\0'; + printf("<%s>\n%s\n", sep, buf, sep); + } +} + +int main(int argc, char **argv) +{ + mawk_state_t *m; + mawk_vio_t *vf_stdin, *vf_stdout, *vf_log; + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + + /* set up all pipes */ + mawk_vio_orig_setup_stdio(m, 0, 0, 1); /* whether bind to the app's stdio: 0,0,1=stdin,stdout,stderr; let stderr bind */ + vf_stdin = mawk_vio_fifo_open(m, NULL, MAWK_VIO_I); /* create a pipe for stdin */ + vf_stdout = mawk_vio_fifo_open(m, NULL, MAWK_VIO_O_APPEND); /* create a pipe for stdout */ + vf_log = mawk_vio_fifo_open(m, NULL, MAWK_VIO_O_APPEND); /* create a pipe for logging */ + mawk_file_register(m, "/dev/stdin", F_IN, vf_stdin); /* register /dev/stdin */ + mawk_file_register(m, "/dev/stdout", F_APPEND, vf_stdout); /* register /dev/stdout */ + mawk_file_register(m, "log", F_APPEND, vf_log); /* register a regular open file named "log" */ + m->vio_init = mawk_vio_orig_init; /* file operation is handled by the orig vio */ + + m = libmawk_initialize_stage2(m, argc, argv); /* set up with CLI arguments */ + m = libmawk_initialize_stage3(m); /* execute BEGIN */ + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* libmawk_append_input() operates on "/dev/stdin" as registered, if it is + a pipe, so it is compatible with manual setup */ + libmawk_append_input(m, "Hello world!\n"); + libmawk_run_main(m); + + + /* print all the stdout the script produced so far */ + print_pipe_pending(m, vf_stdout, "stdout"); + print_pipe_pending(m, vf_log, "log"); + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/31_out_pipes_manual/out.ref b/src/example_apps/31_out_pipes_manual/out.ref new file mode 100644 index 0000000..705e6bc --- /dev/null +++ b/src/example_apps/31_out_pipes_manual/out.ref @@ -0,0 +1,7 @@ + +BEGIN test.awk +stdin=Hello world! + + +this is the initial log entry. + diff --git a/src/example_apps/31_out_pipes_manual/test.awk b/src/example_apps/31_out_pipes_manual/test.awk new file mode 100644 index 0000000..2ab3d5a --- /dev/null +++ b/src/example_apps/31_out_pipes_manual/test.awk @@ -0,0 +1,9 @@ +BEGIN { + print "BEGIN test.awk" + print "this is the initial log entry." > "log" + print "foobar" > "real_file" +} + +{ + print "stdin=" $0 +} diff --git a/src/example_apps/50_runlimit/Makefile b/src/example_apps/50_runlimit/Makefile new file mode 100644 index 0000000..100d1d7 --- /dev/null +++ b/src/example_apps/50_runlimit/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/50_runlimit/app.c b/src/example_apps/50_runlimit/app.c new file mode 100644 index 0000000..8fff484 --- /dev/null +++ b/src/example_apps/50_runlimit/app.c @@ -0,0 +1,40 @@ +#include +#include + +/* + Purpose: run the script in small portions, only a few instruction at a time + using runlimit + Run: ./app +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + int n; + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + libmawk_initialize_stdio(m, 0, 1, 1); /* set up default stdio: stdin is a pipe, stdout and stderr are bound to the app's stdout and stderr with no-close-on-exit */ + mawk_append_input_file(m, "test.awk", 0); /* force load test.awk */ + m = libmawk_initialize_stage2(m, 0, NULL); /* set up with no arguments */ + m->runlimit = 32; /* run 32 instructions at a time */ + m = libmawk_initialize_stage3(m); /* start executing BEGIN */ + + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* resume a few times */ + for(n = 0; n < 8; n++) { + printf("app: interrupt\n"); + mawk_resume(m); + } + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/50_runlimit/out.ref b/src/example_apps/50_runlimit/out.ref new file mode 100644 index 0000000..579b97a --- /dev/null +++ b/src/example_apps/50_runlimit/out.ref @@ -0,0 +1,34 @@ +script begin: 1 +script begin: 2 +app: interrupt +script begin: 3 +app: interrupt +script func ENTER +script func: 1 0.333333 +script func: 2 0.666667 +script func: 3 1 +app: interrupt +script func LEAVE +script begin: 4 +app: interrupt +script begin: 5 +script begin: 6 +app: interrupt +script func ENTER +script func: 1 0.333333 +script func: 2 0.666667 +script func: 3 1 +app: interrupt +script func: 4 1.33333 +script func: 5 1.66667 +script func: 6 2 +script func LEAVE +app: interrupt +script begin: 7 +script begin: 8 +app: interrupt +script begin: 9 +script func ENTER +script func: 1 0.333333 +script func: 2 0.666667 +script func: 3 1 diff --git a/src/example_apps/50_runlimit/test.awk b/src/example_apps/50_runlimit/test.awk new file mode 100644 index 0000000..e823e60 --- /dev/null +++ b/src/example_apps/50_runlimit/test.awk @@ -0,0 +1,18 @@ +function fnc() +{ + print "script func ENTER" + for(i in A) + print "script func:", i, A[i] + print "script func LEAVE" +} + +BEGIN { + while(1) { + n++ + print "script begin: " n + A[n] = n/3 + if ((n % 3) == 0) + fnc() + } +} + diff --git a/src/example_apps/51_blocking_fifo/Makefile b/src/example_apps/51_blocking_fifo/Makefile new file mode 100644 index 0000000..234b0d3 --- /dev/null +++ b/src/example_apps/51_blocking_fifo/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -g -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/51_blocking_fifo/app.c b/src/example_apps/51_blocking_fifo/app.c new file mode 100644 index 0000000..b1b815b --- /dev/null +++ b/src/example_apps/51_blocking_fifo/app.c @@ -0,0 +1,50 @@ +#include +#include + +/* + Purpose: run the script in small portions and: + 1. starve stdin: the script will not be able to proceed + 2. resume record by record to test if getline in main works + Run: ./app +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + int n; + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + libmawk_initialize_stdio(m, 0, 1, 1); /* set up default stdio: stdin is a pipe, stdout and stderr are bound to the app's stdout and stderr with no-close-on-exit */ + mawk_append_input_file(m, "test.awk", 0); /* force load test.awk */ + m = libmawk_initialize_stage2(m, 0, NULL); /* set up with no arguments */ + m->runlimit = 32; /* run 32 instructions at a time */ + m = libmawk_initialize_stage3(m); /* start executing BEGIN */ + + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + printf("app: 1. starving: resume a few times without new input\n"); + for(n = 0; n < 8; n++) { + printf("app: interrupt\n"); + mawk_resume(m); + } + + printf("app: 2. resume a few times with one input record each time\n"); + for(n = 0; n < 16; n++) { + char s[32]; + printf("app: interrupt\n"); + sprintf(s, "rec %d\n", n); + libmawk_append_input(m, s); + mawk_resume(m); + } + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/51_blocking_fifo/out.ref b/src/example_apps/51_blocking_fifo/out.ref new file mode 100644 index 0000000..07c4c74 --- /dev/null +++ b/src/example_apps/51_blocking_fifo/out.ref @@ -0,0 +1,42 @@ +app: 1. starving: resume a few times without new input +app: interrupt +app: interrupt +app: interrupt +app: interrupt +app: interrupt +app: interrupt +app: interrupt +app: interrupt +app: 2. resume a few times with one input record each time +app: interrupt +script begin: 0 rec 0 +app: interrupt +script begin: 1 rec 1 +app: interrupt +script begin: 2 rec 2 +app: interrupt +script begin: 3 rec 3 +app: interrupt +script main: 5 rec 4 +app: interrupt +script main: 6 rec 5 +app: interrupt +script func: 0 rec 0 rec 6 +app: interrupt +script func: 1 rec 1 rec 7 +app: interrupt +script func: 2 rec 2 rec 8 +app: interrupt +script func: 3 rec 3 rec 9 +app: interrupt +script func: 5 rec 4 rec 10 +app: interrupt +script func: 6 rec 5 rec 11 +app: interrupt +script main: 7 rec 12 +app: interrupt +script main: 8 rec 13 +app: interrupt +script main: 9 rec 14 +app: interrupt +script main2: 9 rec 15 diff --git a/src/example_apps/51_blocking_fifo/test.awk b/src/example_apps/51_blocking_fifo/test.awk new file mode 100644 index 0000000..3ffbd98 --- /dev/null +++ b/src/example_apps/51_blocking_fifo/test.awk @@ -0,0 +1,28 @@ +function fnc( i,line) +{ + for(i in A) { + getline line + print "script func:", i, A[i], line + } +} + +BEGIN { + for(n = 0; n < 4; n++) { + getline line + print "script begin: ", n, line + A[n] = line + } +} + +{ + n++ + print "script main: ", n, $0 + A[n] = $0 + if (n == 6) + fnc() + else if (n > 8) { + getline line + print "script main2: ", n, line + } +} + diff --git a/src/example_apps/70_c_func/Makefile b/src/example_apps/70_c_func/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/70_c_func/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/70_c_func/app.c b/src/example_apps/70_c_func/app.c new file mode 100644 index 0000000..01b3011 --- /dev/null +++ b/src/example_apps/70_c_func/app.c @@ -0,0 +1,59 @@ +#include +#include + +/* + Purpose: implement a C function that can be called from the script. + Run: ./app -f test.awk +*/ + +/* sp is the stack pointer, a_args is the number of arguments on the stack */ +mawk_cell_t *blobb(mawk_state_t *context, mawk_cell_t *sp, int num_args) +{ + int n; + char buff[64]; + + /* do something - print BLOBB and all arguments */ + printf("BLOBB! "); + for(n = 0; n < num_args; n++) + printf("arg%d='%s' ", n, libmawk_print_cell(context, libmawk_cfunc_arg(sp, num_args, n), buff, sizeof(buff))); + printf("\n"); + + /* set a return value (find out where the return value is on the stack, + using libmawk_cfunc_ret()) */ + libmawk_set_cell(context, libmawk_cfunc_ret(sp, num_args), 'f', (double)1234); + + /* return the new stack pointer - should be the one that was before + arguments had been pushed on the stack */ + return sp - num_args; +} + + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* The function is registered after BEGIN is run by libmawk_initialize; + this means the script can not call the function from BEGIN. If calling + from BEGIN is required, the 3-stage initialization shall be implemented, + the function registered between stage1 and stage2. */ + + /* register a C function (resolved runtime) */ + if (libmawk_register_function(m, "blobb", blobb) != 0) { + fprintf(stderr, "app: ERROR: Unable to register function blobb\n"); + return 1; + } + + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/70_c_func/out.ref b/src/example_apps/70_c_func/out.ref new file mode 100644 index 0000000..c7ac017 --- /dev/null +++ b/src/example_apps/70_c_func/out.ref @@ -0,0 +1,3 @@ +script: BEGIN +BLOBB! arg0='42.434445' arg1='oops' +script: END: 1234 diff --git a/src/example_apps/70_c_func/test.awk b/src/example_apps/70_c_func/test.awk new file mode 100644 index 0000000..90cfb1e --- /dev/null +++ b/src/example_apps/70_c_func/test.awk @@ -0,0 +1,2 @@ +BEGIN { print "script: BEGIN" } +END { print "script: END: " blobb(42.434445, "oops") } diff --git a/src/example_apps/71_c_func_call/Makefile b/src/example_apps/71_c_func_call/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/71_c_func_call/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/71_c_func_call/app.c b/src/example_apps/71_c_func_call/app.c new file mode 100644 index 0000000..28c67bd --- /dev/null +++ b/src/example_apps/71_c_func_call/app.c @@ -0,0 +1,71 @@ +#include +#include + +/* + Purpose: implement a C function that could be called from the script then + call it from C using the awk function call entry, as if it was an + awk function - this should work as there is no difference between + functions implemented in C and awk once they are properly registered + Run: ./app -f test.awk +*/ + +/* sp is the stack pointer, a_args is the number of arguments on the stack */ +mawk_cell_t *blobb(mawk_state_t *context, mawk_cell_t *sp, int num_args) +{ + int n; + char buff[64]; + + /* do something - print BLOBB and all arguments */ + printf("BLOBB! "); + for(n = 0; n < num_args; n++) + printf("arg%d='%s' ", n, libmawk_print_cell(context, libmawk_cfunc_arg(sp, num_args, n), buff, sizeof(buff))); + printf("\n"); + + /* set a return value (find out where the return value is on the stack, + using libmawk_cfunc_ret()) */ + libmawk_set_cell(context, libmawk_cfunc_ret(sp, num_args), 'f', (double)1234); + + /* return the new stack pointer - should be the one that was before + arguments had been pushed on the stack */ + return sp - num_args; +} + + +int main(int argc, char **argv) +{ + mawk_state_t *m; + mawk_cell_t ret = libmawk_empty_cell; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* The function is registered after BEGIN is run by libmawk_initialize; + this means the script can not call the function from BEGIN. If calling + from BEGIN is required, the 3-stage initialization shall be implemented, + the function registered between stage1 and stage2. */ + + /* register a C function (resolved runtime) */ + if (libmawk_register_function(m, "blobb", blobb) != 0) { + fprintf(stderr, "app: ERROR: Unable to register function blobb\n"); + return 1; + } + + if (libmawk_call_function(m, "blobb", &ret, "fs", 42.42, "hello") == MAWK_EXER_FUNCRET) { + char buff[32]; + printf("app: return value of func blobb '%s'\n", libmawk_print_cell(m, &ret, buff, sizeof(buff))); + libmawk_cell_destroy(m, &ret); + } + else + printf("app: error: function blobb didn't return\n"); + + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/71_c_func_call/out.ref b/src/example_apps/71_c_func_call/out.ref new file mode 100644 index 0000000..0ef495a --- /dev/null +++ b/src/example_apps/71_c_func_call/out.ref @@ -0,0 +1,5 @@ +script: BEGIN +BLOBB! arg0='42.420000' arg1='hello' +app: return value of func blobb '1234' +BLOBB! arg0='42.434445' arg1='oops' +script: END: 1234 diff --git a/src/example_apps/71_c_func_call/test.awk b/src/example_apps/71_c_func_call/test.awk new file mode 100644 index 0000000..90cfb1e --- /dev/null +++ b/src/example_apps/71_c_func_call/test.awk @@ -0,0 +1,2 @@ +BEGIN { print "script: BEGIN" } +END { print "script: END: " blobb(42.434445, "oops") } diff --git a/src/example_apps/90_custom_array/Makefile b/src/example_apps/90_custom_array/Makefile new file mode 100644 index 0000000..538ee1e --- /dev/null +++ b/src/example_apps/90_custom_array/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o carr.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/90_custom_array/app.c b/src/example_apps/90_custom_array/app.c new file mode 100644 index 0000000..dc1c92e --- /dev/null +++ b/src/example_apps/90_custom_array/app.c @@ -0,0 +1,39 @@ +#include +#include +#include "carr.h" + +/* + Purpose: create a virtual array that directly manipulates a C char[] without + any backing awk array. The actual array implementation is in carr.c + Run: ./app -f test.awk +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + int n; + + m = libmawk_initialize_stage1(); /* set up m */ + + /* set up all pipes */ + libmawk_initialize_stdio(m, 0, 1, 1); + + /* set up a new builtin array with side effects, before parsing scripts + so that it works from BEGIN */ + custom_array_init(m); + + m = libmawk_initialize_stage2(m, argc, argv); /* parse args loads the script(s) */ + m = libmawk_initialize_stage3(m); /* execute BEGIN {} */ + + /* Print the current state of the array */ + printf("app: CARR[]=\n"); + for(n = 0; n < CARR_SIZE; n++) + printf("app: [%d]=%d '%c'\n", n, carr[n], carr[n]); + + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/90_custom_array/carr.c b/src/example_apps/90_custom_array/carr.c new file mode 100644 index 0000000..f412efd --- /dev/null +++ b/src/example_apps/90_custom_array/carr.c @@ -0,0 +1,144 @@ +#include "carr.h" + +/* this example code demonstrates how to implement a virtual array that + manipulates a C array. The code aims for simplicity so there's no + locking of the C array, which is obviously common for any mawk instance: + the code assumes a single thread. + + Index is integer between 0 and CARR_SIZE-1. Delete only sets value to 0. + The iterator always goes between 0 and CARR_SIZE-1. + + */ + +char carr[CARR_SIZE]; + +/* lookup: copy a cell of a member if res != NULL; if create_flag is non-zero, + the index should be created. If result is non-NULL, it must be destroyed. + Result can be the same as cidx! + return: + -1 on error + 0 if index does not exist + 1 if it does exist + Because we have a static array, we won't ever return 0; also ignore the + create flag. Return -1 for boundary error. + */ +int carr_find(mawk_state_t *MAWK, mawk_array_t A, const mawk_cell_t *cidx, mawk_cell_t *res, int create_flag) +{ + int idx; + idx = libmawk_cell2int(MAWK, cidx); + if ((idx < 0) || (idx >= CARR_SIZE)) + return -1; + + if (res != NULL) { + mawk_cell_destroy(MAWK, res); + res->type = C_NUM; + res->d.dval = carr[idx]; + } + return 1; +} + + +/* set the value of an element of the array. The val cell may have any type! */ +void carr_set(mawk_state_t *MAWK, mawk_array_t A, const mawk_cell_t *cidx, mawk_cell_t *val) +{ + int idx; + char cval; + + idx = libmawk_cell2int(MAWK, cidx); + if ((idx < 0) || (idx >= CARR_SIZE)) + return; + switch(val->type) { + case C_NUM: + case C_STRNUM: + case C_MBSTRN: + carr[idx] = libmawk_cell2int(MAWK, val); + break; + case C_STRING: + { + char buff[2]; + libmawk_print_cell(MAWK, val, buff, 2); + carr[idx] = buff[0]; + } + break; + default: + carr[idx] = 0; + } +} + + +/* delete a cell (since our array is static, set the value to 0) */ +static void carr_delete(mawk_state_t *MAWK, mawk_array_t A, const mawk_cell_t *idx) +{ + mawk_cell_t zero; + zero.type = C_NUM; + zero.d.dval = 0; + carr_set(MAWK, A, idx, &zero); +} + +/* set up an iterator; the iterator state is just an integer index */ +typedef struct { + int idx; + mawk_cell_t cidx; +} carr_it_t; + +void *carr_it_start(mawk_state_t *MAWK, mawk_array_t A) +{ + carr_it_t *i; + i = malloc(sizeof(carr_it_t)); + i->idx = -1; + i->cidx.type = C_NUM; + return i; +} + +/* return next element using an iterator (thread safe) */ +const mawk_cell_t *carr_it_next(mawk_state_t *MAWK, mawk_array_t A, void *iterator) +{ + carr_it_t *i = iterator; + i->idx++; + if (i->idx >= CARR_SIZE) + return NULL; + i->cidx.d.dval = i->idx; + return &i->cidx; +} + +/* finish iteration */ +void carr_it_stop(mawk_state_t *MAWK, mawk_array_t A, void *iterator) +{ + /* no need to destroy iterator->cidx as it was a number all the time */ + free(iterator); +} + +/* NOTE: the above code tries to be readable, thus there's a separate + integer index an a cell for sotring the return-cell are in the iterator. + The code could be shorter taking less ram if the iterator was a cell + whose (int)d.dval stored the index. */ + +/* the implementation struct that describes how the array is implemented */ +array_imp_t carr_imp = { + /* low level access: find an index, set and delete members */ + carr_find, + carr_set, + carr_delete, + + /* use the generic implementation for high level calls, because... */ + mawk_array_clear_generic, + mawk_array_loop_vector_generic, + mawk_array_load_generic, + + /* ... the low level iterator is implemented: */ + carr_it_start, + carr_it_next, + carr_it_stop +}; + +/* set up the array as a built-in */ +void custom_array_init(mawk_state_t *m) +{ + /* register the new array */ + libmawk_register_array(m, "CARR", &carr_imp); + + /* globals should be zero'd, but to make it absolutely clear: */ + memset(carr, 0, sizeof(carr)); +} + + diff --git a/src/example_apps/90_custom_array/carr.h b/src/example_apps/90_custom_array/carr.h new file mode 100644 index 0000000..0b3d2cd --- /dev/null +++ b/src/example_apps/90_custom_array/carr.h @@ -0,0 +1,5 @@ +#include + +#define CARR_SIZE 32 +extern char carr[CARR_SIZE]; +void custom_array_init(mawk_state_t *m); diff --git a/src/example_apps/90_custom_array/out.ref b/src/example_apps/90_custom_array/out.ref new file mode 100644 index 0000000..8923073 Binary files /dev/null and b/src/example_apps/90_custom_array/out.ref differ diff --git a/src/example_apps/90_custom_array/test.awk b/src/example_apps/90_custom_array/test.awk new file mode 100644 index 0000000..69751fd --- /dev/null +++ b/src/example_apps/90_custom_array/test.awk @@ -0,0 +1,16 @@ +BEGIN { +# fill in the array with "HELLO WORLD" from index 1; split() tries to fill +# the array with each word, but the virtual array set code will consider only +# the first character of each word + split("Haha Error Light Lungs Old Water Opponent Riot Long Diode !", CARR, " ") + +# use numbers to fill in some more, leaving a gap between the end of the +# last index used by split() and the first used by this call + for(n = 14; n < 32; n++) + CARR[n] = 65+n-14 + +# delete [4], which is the second "L"; deletion means setting to 0 +# with this custom implementation since there is no way to really +# remove items from a static array + delete CARR[4] +} diff --git a/src/example_apps/90_custom_vio/Makefile b/src/example_apps/90_custom_vio/Makefile new file mode 100644 index 0000000..62d6b67 --- /dev/null +++ b/src/example_apps/90_custom_vio/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o vio_hash.o +CFLAGS = -I$(ROOT) -g + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/90_custom_vio/app.c b/src/example_apps/90_custom_vio/app.c new file mode 100644 index 0000000..e840b35 --- /dev/null +++ b/src/example_apps/90_custom_vio/app.c @@ -0,0 +1,54 @@ +#include +#include + +/* + Purpose: register "/dev/foo" to a custion vio that does calculates some + sort of hash from all data written to it. + The hash files are set up in a static manner: once the script + closes them or the files get eof from the app, they can not + be reopened to be the same virtual files (instead a reopen + would call the normal file open procedure) + Run: ./app -f test.awk +*/ + +int main(int argc, char **argv) +{ + mawk_state_t *m; + mawk_vio_t *vf; + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + + /* set up all pipes */ + libmawk_initialize_stdio(m, 0, 1, 1); + + vf = mawk_vio_hash_open(m, NULL, MAWK_VIO_O_APPEND); /* create a pipe for stdout */ + mawk_file_register(m, "/dev/hash", F_APPEND, vf); /* register for write */ + mawk_file_register(m, "/dev/hash", F_IN, vf); /* register for read */ + + printf("app: hash before begin: %d\n", mawk_vio_hash_val(m, vf)); + + m = libmawk_initialize_stage2(m, argc, argv); /* set up with CLI arguments */ + m = libmawk_initialize_stage3(m); /* execute BEGIN */ + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + printf("app: hash after begin: %d\n", mawk_vio_hash_val(m, vf)); + + /* run END */ + libmawk_uninitialize_stage1(m); + + printf("app: hash after end: %d\n", mawk_vio_hash_val(m, vf)); + + /* need to release the app end of the deal to get everything free'd */ + mawk_vio_hash_eof_from_app(m, vf); + + /* free things */ + libmawk_uninitialize_stage2(m); + + return 0; +} + diff --git a/src/example_apps/90_custom_vio/out.ref b/src/example_apps/90_custom_vio/out.ref new file mode 100644 index 0000000..17279d4 --- /dev/null +++ b/src/example_apps/90_custom_vio/out.ref @@ -0,0 +1,5 @@ +app: hash before begin: 0 +script: BEGIN test.awk +app: hash after begin: 47330 +script: END: 2070754 +app: hash after end: 2070754 diff --git a/src/example_apps/90_custom_vio/test.awk b/src/example_apps/90_custom_vio/test.awk new file mode 100644 index 0000000..9cd2f3c --- /dev/null +++ b/src/example_apps/90_custom_vio/test.awk @@ -0,0 +1,10 @@ +BEGIN { + print "script: BEGIN test.awk" + print "Hello world!" > "/dev/hash" +} + +END { + print "end." > "/dev/hash" + getline val < "/dev/hash" + print "script: END: " val +} diff --git a/src/example_apps/90_custom_vio/vio_hash.c b/src/example_apps/90_custom_vio/vio_hash.c new file mode 100644 index 0000000..c8b1415 --- /dev/null +++ b/src/example_apps/90_custom_vio/vio_hash.c @@ -0,0 +1,187 @@ +/******************************************** +libmawk (C) 2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include +#include +#include "vio_hash.h" +#include "memory.h" + +typedef struct mawk_vio_hash_s { + /* this field is mandatory and must be the first*/ + mawk_vio_t vio_common_head; + + /* vio implementation-specific fields */ + mawk_zfifo_t fifo; + int eof_from_awk; /* 1 if there won't be more from awk or awk won't accept more data (close()) */ + int eof_from_app; /* 1 if there won't be more from the app or the app won't accept more data */ + unsigned long int seed, value; +} mawk_vio_hash_t; + +/* create a new hash vio */ +mawk_vio_t *mawk_vio_hash_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode) +{ + mawk_vio_hash_t *v; + + if ((mode != MAWK_VIO_O_APPEND) && (mode != MAWK_VIO_O_TRUNC)) + return NULL; + + v = mawk_zmalloc(MAWK, sizeof(mawk_vio_hash_t)); + v->vio_common_head.imp = &mawk_vio_hash_imp; + v->vio_common_head.refco = 0; + v->eof_from_awk = 0; + v->eof_from_app = 0; + v->seed = 0; + v->value = 0; + return (mawk_vio_t *)v; +} + +/* add a chatacter to the hash */ +static void hash_putc(mawk_vio_hash_t *v, int val) +{ + val <<= v->seed % 32; + v->value ^= val; + v->seed++; +} + +/* putchar from awk */ +int mawk_vio_hash_putc(mawk_state_t *MAWK, mawk_vio_t *vf, char c) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + + if (v->eof_from_app) + return -1; + + hash_putc(v, c); + return 1; +} + +/* write string from awk */ +int mawk_vio_hash_write_str(mawk_state_t *MAWK, mawk_vio_t *vf, const char *str) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + int len; + + if (v->eof_from_app) + return -1; + + for(len = 0; *str != '\0'; str++, len++) + hash_putc(v, *str); + return len; +} + +/* write bin from awk */ +int mawk_vio_hash_write(mawk_state_t *MAWK, mawk_vio_t *vf, const char *data, int len) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + int n; + + if (v->eof_from_app) + return -1; + + for(n = 0; n < len; n++) + hash_putc(v, data[n]); + return len; +} + +/* do not implement printf as there's no clever way doing it; let libmawk + do the sprintf() thing */ +int mawk_vio_hash_printf(mawk_state_t *MAWK, mawk_vio_t *vf, const char *fmt, ...) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + abort(); +} + +/* in case the file is read in awk, it should return the current hash value + and an eof (one-shot file). Sending eof is done by putting the file in + eof state in the first call and returning eof in the second. + + NOTE: this simple implementation won't allow the script to open the same + file again; mawk state's vio_init.open should be hooked for that + and a manual vf reopen shall be done. + */ +int mawk_vio_hash_read(mawk_state_t *MAWK, mawk_vio_t *vf, char *dst, long int size) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + char buff[64]; + int l; + + if (v->eof_from_app) + return 0; + + l = sprintf(buff, "%d\n", v->value); + if (l <= size) { + memcpy(dst, buff, l); + v->eof_from_app = 1; + return l; + } + return -1; +} + +static void close_on_eof(mawk_state_t *MAWK, mawk_vio_hash_t *v) +{ + /* free the struct only if both sides closed it */ + if ((v->eof_from_app) && (v->eof_from_awk)) + mawk_zfree(MAWK, v, sizeof(mawk_vio_hash_t)); +} + +/* the script closes the file */ +int mawk_vio_hash_close(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + + v->eof_from_awk = 1; + close_on_eof(MAWK, v); + return 0; +} + +int mawk_vio_hash_flush(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + /* nothing to do on flush */ +} + +int mawk_vio_hash_error(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + return (vf == NULL); +} + +void mawk_vio_hash_mark_no_close(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + /* fifos are always closed when both awk and the app closes them and + there are no inherited pipes anyway */ +} + +const mawk_vio_imp_t mawk_vio_hash_imp = { + mawk_vio_hash_putc, + mawk_vio_hash_write_str, + mawk_vio_hash_write, + mawk_vio_hash_printf, + mawk_vio_hash_read, + mawk_vio_hash_close, + mawk_vio_hash_flush, + mawk_vio_hash_error, + mawk_vio_hash_mark_no_close +}; + + +/* helper calls for the app */ +unsigned long int mawk_vio_hash_val(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + return v->value; +} + +/* the app wanted to close the file */ +int mawk_vio_hash_eof_from_app(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + + v->eof_from_app = 1; + close_on_eof(MAWK, v); + return 0; +} diff --git a/src/example_apps/90_custom_vio/vio_hash.h b/src/example_apps/90_custom_vio/vio_hash.h new file mode 100644 index 0000000..3badce0 --- /dev/null +++ b/src/example_apps/90_custom_vio/vio_hash.h @@ -0,0 +1,9 @@ +extern const mawk_vio_imp_t mawk_vio_hash_imp; + +mawk_vio_t *mawk_vio_hash_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode); + +/* retrieve current value of the hash */ +unsigned long int mawk_vio_hash_val(mawk_state_t *MAWK, mawk_vio_t *vf); + +/* the application wants to signal eof to the script */ +int mawk_vio_hash_eof_from_app(mawk_state_t *MAWK, mawk_vio_t *vf); diff --git a/src/example_apps/90_fn_validation/Makefile b/src/example_apps/90_fn_validation/Makefile new file mode 100644 index 0000000..d531aea --- /dev/null +++ b/src/example_apps/90_fn_validation/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/90_fn_validation/app.c b/src/example_apps/90_fn_validation/app.c new file mode 100644 index 0000000..9fdd1d7 --- /dev/null +++ b/src/example_apps/90_fn_validation/app.c @@ -0,0 +1,69 @@ +#include +#include + +/* + Purpose: manipulate or deny opening specific files and commands + Run: ./app -f test.awk +*/ + +/* called any time the script wants to open a new file or command + (print redirection or getline); + return: + - orig_name of OK as is + - buff after filling in a new file name or command there + - another string const (won't be freed) + - NULL to deny opening the file/running the command */ + +const char *fn_rewrite(const char *orig_name, char *buff, int buff_size, int type) +{ + switch (type) { + case F_IN: + /* wants to read - allow any read-only op */ + return orig_name; + + case F_TRUNC: + case F_APPEND: + /* wants to write a file - redirect to out.txt */ + return "out.txt"; + + case PIPE_OUT: + case PIPE_IN: + { + static const char *wrapper = "echo '%s'"; /* unsafe: %s may contain '. */ + + /* run command - rewrite to use echo instead */ + if (sizeof(orig_name) > buff_size - strlen(wrapper) - 2) + return NULL; /* too long, we can't easily wrap, deny */ + + sprintf(buff, wrapper, orig_name); + return buff; + } + default: + /* ...if the API changes in the future: deny! */ + return NULL; + } +} + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + libmawk_initialize_stdio(m, 1, 1, 1); + m->file_name_rewrite = fn_rewrite; /* hook file name for rewriting */ + m = libmawk_initialize_stage2(m, argc, argv); /* set up with no arguments */ + m = libmawk_initialize_stage3(m); /* execute BEGIN */ + + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + /* run END and free the context */ + libmawk_uninitialize(m); + + return 0; +} + diff --git a/src/example_apps/90_fn_validation/out.ref b/src/example_apps/90_fn_validation/out.ref new file mode 100644 index 0000000..c1a35b9 --- /dev/null +++ b/src/example_apps/90_fn_validation/out.ref @@ -0,0 +1,3 @@ +script: BEGIN test.awk +script: read=BEGIN { +script: date=date +%s diff --git a/src/example_apps/90_fn_validation/test.awk b/src/example_apps/90_fn_validation/test.awk new file mode 100644 index 0000000..091f80a --- /dev/null +++ b/src/example_apps/90_fn_validation/test.awk @@ -0,0 +1,15 @@ +BEGIN { + print "script: BEGIN test.awk" + +# this will work with as expected + getline line < "test.awk" + print "script: read=" line + +# this will be redirected to out.txt instead of "some_file" + print "hello" > "some_file" + +# this won't read the date because it's wrapper in an echo + "date +%s" | getline line + print "script: date=" line +} + diff --git a/src/example_apps/91_custom_vio_re/Makefile b/src/example_apps/91_custom_vio_re/Makefile new file mode 100644 index 0000000..6623710 --- /dev/null +++ b/src/example_apps/91_custom_vio_re/Makefile @@ -0,0 +1,23 @@ +ROOT=../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +include ../Makefile.common + +OBJS = app.o vio_hash.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +app.o: app.c + +run: app + ./app -f test.awk + +clean: + rm -f $(OBJS) app + +distclean: clean + diff --git a/src/example_apps/91_custom_vio_re/app.c b/src/example_apps/91_custom_vio_re/app.c new file mode 100644 index 0000000..d675c0f --- /dev/null +++ b/src/example_apps/91_custom_vio_re/app.c @@ -0,0 +1,97 @@ +#include +#include +#include "vio_hash.h" + + +/* + Purpose: register "/dev/foo" to a custion vio that does calculates some + sort of hash from all data written to it. + The hash files are set up in a dynamic manner: a hash is created + when the script opens it. It is possible to have at most 16 hashes. + Run: ./app -f test.awk +*/ + +/* any hash ever existed will get into this array and won't be freed until + the end of the app. This means if the script writes /dev/hash/3, the value + is preserved even after close("/dev/hash/3"), to allow multiple reads. + The hash value/seed is reset when the file is open for writing. */ +mawk_vio_t *vf_hash[16]; + +mawk_vio_t *dispatch_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode) +{ + /* hijack /dev/hash/* for the hash vio */ + if (strncmp(name, "/dev/hash/", 10) == 0) { + int idx; + char *end; + idx = strtol(name+10, &end, 10); + if (*end != '\0') { + fprintf(stderr, "Warning: invalid hash %s: hash id not an integer\n", name); + return NULL; + } + if ((idx < 0) || (idx >= 16)) { + fprintf(stderr, "Warning: invalid hash %s: hash id out of range\n", name); + return NULL; + } + + /* allocate non-existing hashes or reopen existing ones */ + if (vf_hash[idx] == NULL) + vf_hash[idx] = mawk_vio_hash_open(MAWK, name, mode); + else + mawk_vio_hash_reopen(MAWK, vf_hash[idx], mode); + + return vf_hash[idx]; + } + + /* call the original open function for any other name */ + return mawk_vio_orig_init.vopen(MAWK, name, mode); +} + + +void print_all_hashes(mawk_state_t *m, char *banner) +{ + int n; + printf("app: %s\n", banner); + for(n = 0; n < 16; n++) { + if (vf_hash[n] != NULL) + printf(" /dev/hash/%d=%d\n", n, mawk_vio_hash_val(m, vf_hash[n])); + } +} + +int main(int argc, char **argv) +{ + mawk_state_t *m; + + + /* init a context in stages */ + m = libmawk_initialize_stage1(); /* alloc context */ + + m->vio_init.vopen = dispatch_open; + + /* set up all pipes */ + mawk_vio_orig_setup_stdio(m, 0, 1, 1); /* whether bind to the app's stdio: 0,1,1=stdin,stdout,stderr */ + + m = libmawk_initialize_stage2(m, argc, argv); /* set up with CLI arguments */ + m = libmawk_initialize_stage3(m); /* execute BEGIN */ + + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + print_all_hashes(m, "hash after begin"); + + /* run END */ + libmawk_uninitialize_stage1(m); + + print_all_hashes(m, "hash after end"); + + /* free things */ + libmawk_uninitialize_stage2(m); + + /* no need to free vf_hash[]: + - uninitialize_stage2 will close all existing files + - final destruction of m will free any allocation left + */ + return 0; +} + diff --git a/src/example_apps/91_custom_vio_re/out.ref b/src/example_apps/91_custom_vio_re/out.ref new file mode 100644 index 0000000..3dbdaaa --- /dev/null +++ b/src/example_apps/91_custom_vio_re/out.ref @@ -0,0 +1,10 @@ +script: BEGIN test.awk +app: hash after begin + /dev/hash/4=47330 + /dev/hash/6=293858 +script: END: 2070754 6650850 +script: END: second time 6:6650850 +script: END: third time 6:-1889174558 +app: hash after end + /dev/hash/4=2070754 + /dev/hash/6=-1889174558 diff --git a/src/example_apps/91_custom_vio_re/test.awk b/src/example_apps/91_custom_vio_re/test.awk new file mode 100644 index 0000000..979f6b5 --- /dev/null +++ b/src/example_apps/91_custom_vio_re/test.awk @@ -0,0 +1,24 @@ +BEGIN { + print "script: BEGIN test.awk" + print "Hello world!" > "/dev/hash/4" + print "Hello universe!" > "/dev/hash/6" +} + +END { + print "end." > "/dev/hash/4" + print "END!" > "/dev/hash/6" + getline val4 < "/dev/hash/4" + getline val6 < "/dev/hash/6" + print "script: END: " val4, val6 + +# works even after reopeninig: + close("/dev/hash/6") + getline val6 < "/dev/hash/6" + print "script: END: second time 6:" val6 + +# reopen + close("/dev/hash/6") + print "new thing" > "/dev/hash/6" + getline val6 < "/dev/hash/6" + print "script: END: third time 6:" val6 +} diff --git a/src/example_apps/91_custom_vio_re/vio_hash.c b/src/example_apps/91_custom_vio_re/vio_hash.c new file mode 100644 index 0000000..8719ca9 --- /dev/null +++ b/src/example_apps/91_custom_vio_re/vio_hash.c @@ -0,0 +1,201 @@ +/******************************************** +libmawk (C) 2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include +#include +#include "vio_hash.h" +#include "memory.h" + +typedef struct mawk_vio_hash_s { + const mawk_vio_imp_t *imp; + mawk_zfifo_t fifo; + int eof_from_awk; /* 1 if there won't be more from awk or awk won't accept more data (close()) */ + int eof_from_app; /* 1 if there won't be more from the app or the app won't accept more data */ + unsigned long int seed, value; +} mawk_vio_hash_t; + +/* create a new hash vio */ +mawk_vio_t *mawk_vio_hash_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode) +{ + mawk_vio_hash_t *v; + + if ((mode != MAWK_VIO_O_APPEND) && (mode != MAWK_VIO_O_TRUNC)) + return NULL; + + v = mawk_zmalloc(MAWK, sizeof(mawk_vio_hash_t)); + v->imp = &mawk_vio_hash_imp; + v->eof_from_awk = 0; + v->eof_from_app = 0; + v->seed = 0; + v->value = 0; + return (mawk_vio_t *)v; +} + +/* add a chatacter to the hash */ +static void hash_putc(mawk_vio_hash_t *v, int val) +{ + val <<= v->seed % 32; + v->value ^= val; + v->seed++; +} + +/* putchar from awk */ +int mawk_vio_hash_putc(mawk_state_t *MAWK, mawk_vio_t *vf, char c) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + + if (v->eof_from_app) + return -1; + + hash_putc(v, c); + return 1; +} + +/* write string from awk */ +int mawk_vio_hash_write_str(mawk_state_t *MAWK, mawk_vio_t *vf, const char *str) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + int len; + + if (v->eof_from_app) + return -1; + + for(len = 0; *str != '\0'; str++, len++) + hash_putc(v, *str); + return len; +} + +/* write bin from awk */ +int mawk_vio_hash_write(mawk_state_t *MAWK, mawk_vio_t *vf, const char *data, int len) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + int n; + + if (v->eof_from_app) + return -1; + + for(n = 0; n < len; n++) + hash_putc(v, data[n]); + return len; +} + +/* do not implement printf as there's no clever way doing it; let libmawk + do the sprintf() thing */ +int mawk_vio_hash_printf(mawk_state_t *MAWK, mawk_vio_t *vf, const char *fmt, ...) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + abort(); +} + +/* in case the file is read in awk, it should return the current hash value + and an eof (one-shot file). Sending eof is done by putting the file in + eof state in the first call and returning eof in the second. + + NOTE: this simple implementation won't allow the script to open the same + file again; mawk state's vio_init.open should be hooked for that + and a manual vf reopen shall be done. + */ +int mawk_vio_hash_read(mawk_state_t *MAWK, mawk_vio_t *vf, char *dst, long int size) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + char buff[64]; + int l; + + if (v->eof_from_app) + return 0; + + l = sprintf(buff, "%d\n", v->value); + if (l <= size) { + memcpy(dst, buff, l); + v->eof_from_app = 1; + return l; + } + return -1; +} + +static void close_on_eof(mawk_state_t *MAWK, mawk_vio_hash_t *v) +{ + /* never free the struct - once open, it may be reopen and the host app + keeps a copy. Reset instead. */ +} + +/* the script closes the file */ +int mawk_vio_hash_close(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + + v->eof_from_awk = 1; + close_on_eof(MAWK, v); + return 0; +} + +int mawk_vio_hash_flush(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + /* nothing to do on flush */ +} + +int mawk_vio_hash_error(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + return (vf == NULL); +} + +void mawk_vio_hash_mark_no_close(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + /* fifos are always closed when both awk and the app closes them and + there are no inherited pipes anyway */ +} + +const mawk_vio_imp_t mawk_vio_hash_imp = { + mawk_vio_hash_putc, + mawk_vio_hash_write_str, + mawk_vio_hash_write, + mawk_vio_hash_printf, + mawk_vio_hash_read, + mawk_vio_hash_close, + mawk_vio_hash_flush, + mawk_vio_hash_error, + mawk_vio_hash_mark_no_close +}; + + +/* helper calls for the app */ +unsigned long int mawk_vio_hash_val(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + return v->value; +} + +/* the app wanted to close the file */ +int mawk_vio_hash_eof_from_app(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + + v->eof_from_app = 1; + close_on_eof(MAWK, v); + return 0; +} + +void mawk_vio_hash_reopen(mawk_state_t *MAWK, mawk_vio_t *vf, mawk_vio_open_mode_t mode) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + v->eof_from_awk = 0; + v->eof_from_app = 0; + + /* reopen for write means a new hash, make it clean! */ + if ((mode == MAWK_VIO_O_APPEND) && (mode == MAWK_VIO_O_TRUNC)) { + v->value = 0; + v->seed = 0; + } +} + +void mawk_vio_hash_free(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_hash_t *v = (mawk_vio_hash_t *)vf; + mawk_zfree(MAWK, v, sizeof(mawk_vio_hash_t)); +} diff --git a/src/example_apps/91_custom_vio_re/vio_hash.h b/src/example_apps/91_custom_vio_re/vio_hash.h new file mode 100644 index 0000000..db0a3dd --- /dev/null +++ b/src/example_apps/91_custom_vio_re/vio_hash.h @@ -0,0 +1,17 @@ +extern const mawk_vio_imp_t mawk_vio_hash_imp; + + +mawk_vio_t *mawk_vio_hash_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode); + +/* reopen an existing vf: reset eof states */ +void mawk_vio_hash_reopen(mawk_state_t *MAWK, mawk_vio_t *vf, mawk_vio_open_mode_t mode); + +/* free a hash struct */ +void mawk_vio_hash_free(mawk_state_t *MAWK, mawk_vio_t *vf); + +/* retrieve current value of the hash */ +unsigned long int mawk_vio_hash_val(mawk_state_t *MAWK, mawk_vio_t *vf); + +/* the application wants to signal eof to the script */ +int mawk_vio_hash_eof_from_app(mawk_state_t *MAWK, mawk_vio_t *vf); + diff --git a/src/example_apps/Makefile b/src/example_apps/Makefile new file mode 100644 index 0000000..38e363e --- /dev/null +++ b/src/example_apps/Makefile @@ -0,0 +1,29 @@ +APPS= \ + 10_run \ + 12_input \ + 12_multi \ + 15_call \ + 15_get_array \ + 15_get_scalar \ + 20_init_wired \ + 20_uninit_staged \ + 25_set_array \ + 30_out_pipes \ + 31_out_pipes_manual \ + 50_runlimit \ + 51_blocking_fifo \ + 70_c_func \ + 71_c_func_call \ + 90_custom_array \ + 90_custom_vio \ + 90_fn_validation \ + 91_custom_vio_re + +all: + for n in $(APPS); do cd $$n && make all && cd ..; done + +clean: + for n in $(APPS); do cd $$n && make clean && cd ..; done + +test: + for n in $(APPS); do cd $$n && make -s test; cd ..; done diff --git a/src/example_apps/Makefile.common b/src/example_apps/Makefile.common new file mode 100644 index 0000000..93a0055 --- /dev/null +++ b/src/example_apps/Makefile.common @@ -0,0 +1,9 @@ +$(ROOT)/libmawk/libmawk.a: FORCE + cd $(ROOT)/libmawk && make libmawk.a + +test: + @make -s run > out.curr + @diff -u out.ref out.curr && rm out.curr + @awk -v "name=`basename $(PWD)`" 'BEGIN { printf("%-20s QC PASS\n", name)}' + +FORCE: diff --git a/src/example_apps/README b/src/example_apps/README new file mode 100644 index 0000000..4e7ee19 --- /dev/null +++ b/src/example_apps/README @@ -0,0 +1,19 @@ +This directory hosts a set of minimalistic example applications and +example scripts to try them on. Each example shall demonstrate a single +feature, the reader is expected to be able to combine these features when +building a large application. + +Directories are prefixed with a number to keep examples in order of +complexity. Starting at lower numbers helps understanding the basics +before examples start to deal with the more complex features. Examples +prefixed with the same number are on the same level of complexity and +may depend on familiarity with lower numbered examples only. + +Each directory is a single host application (with main() implemented in +app.c, being the starting point) usually accompanied by an awk script +(test.awk). There's always a comment explaining the purpose and invocation +of the example on top of app.c. Furthermore "make run" will run the example +with the standard invocation; the expected output is in out.ref; "make test" +runs the app and compares the output to the reference. + + diff --git a/src/libmawk/ACKNOWLEDGMENT.mawk b/src/libmawk/ACKNOWLEDGMENT.mawk new file mode 100644 index 0000000..e01329b --- /dev/null +++ b/src/libmawk/ACKNOWLEDGMENT.mawk @@ -0,0 +1,47 @@ +Version 1.2 +=========== + +Thanks for help with beta test to Bill Davidsen, Tom Dickey, Ed +Ferguson, Jack Fitts, Onno van der Linden, Carl Mascott, Jean-Pierre +Radley, John Roll, Ian Searle, Bob Stockler. + +The calendar program examples/hical was written by Bob Stockler. + +Darrel Hankerson ported versions 1.2.x to DOS/OS2. + +Version 1.0 and 1.1 +=================== + +Carl Mascott ported mawk to V7 and in the process rooted out +some subtle (and not so subtle) bugs. + +Ian Searle ported mawk to System V and put up with my insane +attempts to get fpe exception trapping off. + +An anonymous reviewer for comp.sources.reviewed did the +MSC and Mac ports and wrote .bat files for the tests. +Another or maybe the same reviewer did the Dynix port. + +Ports to new systems: + Ed Ferguson MIPS M2000 C2.20 OS4.52 + Jwahar R. Bammi Atari ST + Berry Kercheval SGI IRIX 4.0.1 + Andy Newman Next 2.1 + Mike Carlton Next 2.1 + Elliot Jaffe AIX 3.1 + Jeremy Martin Convex 9.1 + Scott Hunziker Coherent 4.0 + Ken Poulton Hpux + Onno van der Linden 386bsd 0.1 + Bob Hutchinson Linux 0.98p14 + +The DOS version is a lot better thanks to suggestions and testing +from Ed Ferguson, Jack Fitts, Nadav Horesh, Michael Golan and +Conny Ohstrom. The DOS additions for 1.1.2d are all ideas of +Ben Myers; much of the code is his too. + +Arnold Robbins kept me current on POSIX standards for AWK, and +explained some of the "dark corners". + +Thank you to everyone who reported bugs or offered encouragement, +suggestions or criticism. (At least the bugs got fixed). diff --git a/src/libmawk/CHANGES.mawk b/src/libmawk/CHANGES.mawk new file mode 100644 index 0000000..8497492 --- /dev/null +++ b/src/libmawk/CHANGES.mawk @@ -0,0 +1,42 @@ +1.3.1 -> 1.3.2 Sep 1996 + +1) Numeric but not integer indices caused core dump in new array scheme. + Fixed bug and fired test division. + +2) Added ferror() checks on writes. + +3) Added some static storage specs to array.c to keep non-ansi + compilers happy. + +1.3 -> 1.3.1 Sep 1996 +Release to new ftp site ftp://ftp.whidbey.net. + +1) Workaround for overflow exception in strtod, sunos5.5 solaris. + +2) []...] and [^]...] put ] in a class (or not in a class) without + having to use back-slash escape. + +1.2.2 -> 1.3 Jul 1996 +Extensive redesign of array data structures to support large arrays and +fast access to arrays created with split. Many of the ideas in the +new design were inspired by reading "The Design and Implementation of +Dynamic Hashing Sets and Tables in Icon" by William Griswold and +Gregg Townsend, SPE 23,351-367. + +1.2.1 -> 1.2.2 Jan 1996 + +1) Improved autoconfig, in particular, fpe tests. This is far from + perfect and never will be until C standardizes an interface to ieee754. + +2) Removed automatic error message on open failure for getline. + +3) Flush all output before system(). Previous behavior was to only + flush std{out,err}. + +4) Explicitly fclose() all output on exit to work around AIX4.1 bug. + +5) Fixed random number generator to work with longs larger than + 32bits. + +6) Added a type Int which is int on real machines and long on dos machines. + Believe that all implicit assumptions that int=32bits are now gone. diff --git a/src/libmawk/COPYING b/src/libmawk/COPYING new file mode 100644 index 0000000..a43ea21 --- /dev/null +++ b/src/libmawk/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/src/libmawk/INSTALL b/src/libmawk/INSTALL new file mode 100644 index 0000000..f114627 --- /dev/null +++ b/src/libmawk/INSTALL @@ -0,0 +1,47 @@ +Look at the file config.user and edit to set user defines. + +if your system is one of + apollo + convex + mips + sgi + ultrix-mips + cray + hpux (read below) + unixware (read below) + +and you don't have gcc or prefer to use cc, then you may want to +copy config-user/your_system to config.user and edit that. + +run + + configure + make + + +If you have problems, please report it. If you can fix the problem, by +changing config.user, please send the results. Else send output from +configure, make and config.h. Send to brennan@whidbey.com. + + + +DOS: +Look at the file msdos/INSTALL + + +HPUX: +Evidently there is more than one compiler and/or math library. Some +configurations work out of the box (configure/make). Others need +CFLAGS='+O2 +FPZO'. On HPUX 9.05 with the ansi compiler HP92453-01 +A.09.77 set CFLAGS='-Ae +O2 +FPZO'. Thanks to Dr. Rafael R. +Pappalardo for this info. + + + +UNIXWARE: +On some but not all versions, configure might decide you don't have +memcpy. Remove #define NO_MEMCPY 1 from config.h. +If the fpe_test check fails, change the definition of TURN_ON_FPE_TRAPS +to + +#define TURN_ON_FPE_TRAPS() fpsetmask(fpgetmask()|FP_X_DZ|FP_X_OFL|FP_X_INV) diff --git a/src/libmawk/Makefile b/src/libmawk/Makefile new file mode 100644 index 0000000..557ecec --- /dev/null +++ b/src/libmawk/Makefile @@ -0,0 +1,184 @@ +# select alternative zmalloc schemes +# commented out: use the slab-like original zmalloc (default) +# _native: use malloc()/free() - useful for memory leak hunting +# _safemalloc: use auto-cleanup malloc - useful if malloc() is preferred over slabs but the context should be cleaned up +#ZMALLOC_FLAVOR = _native + +# install these headers in $(INCDIR)/libmawk/ +IHEADERS=libmawk.h mawk.h init.h code.h files.h memory.h zmalloc.h \ + symtype.h nstd.h types.h array.h sizes.h num.h num_int.h \ + num_double.h conf.h bi_vars.h vio.h cell.h repl.h array_orig.h \ + array_environ.h array_generic.h vio_orig.h vio_fifo.h zfifo.h \ + execute.h + +MAWKMAN = $(MANDIR)/lmawk.$(MANEXT) + +VIO_OBJ = files_children.o vio_orig.o + +# common objects +CO=memory.o hash.o code.o vars.o da_bin.o da_common.o da_bin_helper.o error.o \ + bi_vars.o bi_funct_common.o array.o array_orig.o array_generic.o \ + field_common.o re_cmpl.o zmalloc$(ZMALLOC_FLAVOR).o fin_common.o files.o matherr.o fcall.o \ + version.o missing.o math_wrap.o cast.o cell.o scancode.o str.o \ + array_environ.o \ + $(VIO_OBJ) $(NUM_OBJ) + +# parser objects +PO=parse.o scan.o da_text.o code_dump.o kw.o jmp.o + +# execute objects +EO=execute.o bi_funct.o print.o debug.o field_exec.o split.o + +REXP_O=rexp/rexp.o rexp/rexp0.o rexp/rexp1.o rexp/rexp2.o\ + rexp/rexp3.o + +REXP_C=rexp/rexp.c rexp/rexp0.c rexp/rexp1.c rexp/rexp2.c\ + rexp/rexp3.c + +# objects required for the buffer-vio +O_VIO_BUF=zfifo.o vio_fifo.o + +# lib object +LO_ALL=$(CO) $(EO) $(PO) init.o libmawk.o fin_exec.o $(REXP_O) $(O_VIO_BUF) + +# original all-in-one lmawk executable objects +O = main.o $(CO) $(EO) $(PO) init.o fin_exec.o viohack.o $(REXP_O) + +# split shared object +O_COMP = bi_funct_dummy.o init.o print_dummy.o fin_comp.o $(PO) $(REXP_O) +O_EXEC = init_nocomp.o fin_exec.o $(O_VIO_BUF) $(EO) $(REXP_O) + +# split executables +OBJ_COMP = main-comp.o viohack.o $(O_COMP) $(CO) +OBJ_EXEC = main-exec.o viohack.o $(O_EXEC) $(CO) + +all: mawk_and_test + +include Makefile.conf + +BINS = lmawk lmawk-comp lmawk-exec \ + libmawk.so libmawk.a \ + libmawk_common.so libmawk_comp.so libmawk_exec.so \ + regression/re_test/re_test + +mawk_and_test : $(BINS) + +libmawk.so: $(LO_ALL) + $(CC) -shared $(LDFLAGS_RDYNAMIC) $(LDFLAGS) $(LDFLAGS_SO) -o libmawk.so $(LO_ALL) $(MATHLIB) + +libmawk_comp.so: $(O_COMP) + $(CC) -shared $(LDFLAGS_RDYNAMIC) $(LDFLAGS) -o libmawk_comp.so $(O_COMP) $(MATHLIB) + +libmawk_exec.so: $(O_EXEC) + $(CC) -shared $(LDFLAGS_RDYNAMIC) $(LDFLAGS) -o libmawk_exec.so $(O_EXEC) $(MATHLIB) + +libmawk_common.so: $(CO) + $(CC) -shared $(LDFLAGS_RDYNAMIC) $(LDFLAGS) -o libmawk_common.so $(CO) $(MATHLIB) + +libmawk.a: $(LO_ALL) + ar rvu $@ $(LO_ALL) + ranlib $@ + +lmawk : $(O) + $(CC) $(CFLAGS) -o lmawk $(O) $(MATHLIB) + +lmawk-comp : $(OBJ_COMP) + $(CC) $(CFLAGS) -o lmawk-comp $(OBJ_COMP) $(MATHLIB) + +lmawk-exec : $(OBJ_EXEC) + $(CC) $(CFLAGS) -o lmawk-exec $(OBJ_EXEC) $(MATHLIB) + +main-comp.o: main.c + $(CC) -c $(CFLAGS) -DMAWK_NO_EXEC=1 main.c -o main-comp.o + +main-exec.o: main.c + $(CC) -c $(CFLAGS) -DMAWK_NO_COMP=1 main.c -o main-exec.o + + +scancode.c : makescan.c scan.h + $(CC) $(CFLAGS) -o makescan.exe makescan.c + rm -f scancode.c + ./makescan.exe > scancode.c + rm makescan.exe + + +# compile regression tests implemented in C +# these rules are not in Makefiles under regression/, so +# all the $(CC) and $(CFLAGS) work without any extra hassle +RE_TEST_O = regression/re_test/re_test.o rexp/rexp.o rexp/rexp0.o rexp/rexp1.o rexp/rexp2.o rexp/rexp3.o zmalloc$(ZMALLOC_FLAVOR).o memory.o +regression/re_test/re_test: $(RE_TEST_O) + $(CC) -o regression/re_test//re_test $(RE_TEST_O) + +regression/re_test/re_test.o: regression/re_test/re_test.c + $(CC) -c $(CFLAGS) regression/re_test/re_test.c -o regression/re_test/re_test.o + +### misc: installation, clean, etc. ### + +test: all + cd regression && make + +PWD=`pwd` + +install_ : lmawk libmawk.so + $(MKDIR) $(BINDIR) + $(MKDIR) $(MANDIR) + $(MKDIR) $(INCDIR) + $(MKDIR) $(LIBDIR) + $(CP) $(PWD)/lmawk $(BINDIR) + $(CHMODX) $(BINDIR)/lmawk + $(CP) $(PWD)/man/lmawk.1 $(MAWKMAN) + $(CHMOD) 0644 $(MAWKMAN) + $(CP) $(PWD)/libmawk.so $(LIBDIR)/libmawk.so.$(SOVER1).$(SOVER2).$(SOVER3) + rm $(LIBDIR)/libmawk.so.$(SOVER1).$(SOVER2) $(LIBDIR)/libmawk.so.$(SOVER1) 2>/dev/null ; true + ln -s libmawk.so.$(SOVER1).$(SOVER2).$(SOVER3) $(LIBDIR)/libmawk.so.$(SOVER1).$(SOVER2) + ln -s libmawk.so.$(SOVER1).$(SOVER2) $(LIBDIR)/libmawk.so.$(SOVER1) + ln -s libmawk.so.$(SOVER1) $(LIBDIR)/libmawk.so + for h in $(IHEADERS); do $(CP) $(PWD)/$$h $(INCDIR)/$$h; done + $(CP) $(PWD)/../libmawk.h $(INCDIR)/../libmawk.h + +install: + make install_ CP="cp" + +linstall: + make install_ CP="ln -s" + + +rexp/rexp.o: rexp/rexp.c + $(CC) -c $(CFLAGS) -o rexp/rexp.o rexp/rexp.c + +rexp/rexp0.o: rexp/rexp0.c + $(CC) -c $(CFLAGS) -o rexp/rexp0.o rexp/rexp0.c + +rexp/rexp1.o: rexp/rexp1.c + $(CC) -c $(CFLAGS) -o rexp/rexp1.o rexp/rexp1.c + +rexp/rexp2.o: rexp/rexp2.c + $(CC) -c $(CFLAGS) -o rexp/rexp2.o rexp/rexp2.c + +rexp/rexp3.o: rexp/rexp3.c + $(CC) -c $(CFLAGS) -o rexp/rexp3.o rexp/rexp3.c + +$(NUM_OBJ): $(NUM_SRC) num.h + +linstall: install + +init_nocomp.o: init_nocomp.c + $(CC) -c $(CFLAGS) -o init_nocomp.o init_nocomp.c + +init_nocomp.o: init.c + + +clean : + rm -f *.o rexp/*.o $(BINS) + cd regression && make clean + +distclean : clean + rm -f defines.out maxint.out conf.h Makefile.conf + +include Makefile.dep + +depend: + @echo "# Generated by \"make depend\"" > Makefile.dep + gcc $(CFLAGS) -MM `echo $(LO_ALL) | sed "s/\.o/.c/g"` >> Makefile.dep + +FORCE: diff --git a/src/libmawk/Makefile.conf.in b/src/libmawk/Makefile.conf.in new file mode 100644 index 0000000..a823aaa --- /dev/null +++ b/src/libmawk/Makefile.conf.in @@ -0,0 +1,91 @@ +switch cc/soname + case {} + put /local/soname {} + end + default + put /local/soname [@@cc/soname@libmawk.so.@/local/version/1@@] + end +end + +print [@ +# version of the .so file is SOVER1.SOVER2.SOVER3 +SOVER1=@/local/version/1@ +SOVER2=@/local/version/2@ +SOVER3=@/local/version/3@ + +PREFIX=@/local/prefix@ + +#--------------------- hand tweak values --------------------------------- +# TODO: scconfig +SHELL=/bin/sh + +# where to put mawk +BINDIR = $(DESTDIR)$(install_root)$(PREFIX)/bin +# where to put libraries +LIBDIR = $(DESTDIR)$(install_root)$(PREFIX)/lib +# where to put include files +INCDIR = $(DESTDIR)$(install_root)$(PREFIX)/include/libmawk +# where to put the man pages +MANDIR = $(DESTDIR)$(install_root)$(PREFIX)/share/man/man1 +# where to put the doc +DOCDIR = $(DESTDIR)$(install_root)$(PREFIX)/share/doc/libmawk +MANEXT = 1 + + +#----------------- scconfig detected ---------------------------------- +CC = @cc/cc@ +CFLAGS = @cc/cflags@ @cc/fpic@ -I.. -I. $(CFLAGS_APP) +LDFLAGS = @cc/ldflags@ +LDFLAGS_SO = @/local/soname@ +LDFLAGS_RDYNAMIC = @cc/rdynamic@ + +CHMODX = @fstools/chmodx@ +CHMOD = chmod +MKDIR = @fstools/mkdir@ +CP=@fstools/cp@ +SYMLINK=@fstools/ln@ +@] + +switch /local/numeric + +case {double} print [@ +# Numeric format is double (float enabled) +NUM_OBJ = num_double.o +NUM_SRC = num_double.c +MATHLIB = -lm +@] +end + +case {int} print [@ +# Numeric format is int (float disabled) +NUM_OBJ = num_int.o +NUM_SRC = num_int.c +MATHLIB = +@] +end +end + +print [@ +#--- parser generator --- +@] + +if ?parsgen/bison/presents then +print [^ +# bison found, can generate parse.c +YACC = ^parsgen/bison^ -y --name-prefix=Mawk_ + +parse.c : parse.y + @echo expect 4 shift/reduce conflicts + $(YACC) -d parse.y + mv y.tab.c parse.c + -if cmp -s y.tab.h parse.h ;\ + then rm y.tab.h ;\ + else mv y.tab.h parse.h ; fi +^] +else +print [^ +parse.c: parse.y + @echo WARNING: Can not generate parse.c from parse.y because bison was not found. +^] +end + diff --git a/src/libmawk/Makefile.dep b/src/libmawk/Makefile.dep new file mode 100644 index 0000000..9ffe44a --- /dev/null +++ b/src/libmawk/Makefile.dep @@ -0,0 +1,312 @@ +# Generated by "make depend" +memory.o: memory.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h memory.h ../libmawk/zmalloc.h +hash.o: hash.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h memory.h ../libmawk/zmalloc.h \ + symtype.h ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h cell.h repl.h +code.o: code.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h init.h ../libmawk/symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h jmp.h \ + field.h +vars.o: vars.c vars.h mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h zmalloc.h \ + cell.h repl.h +da_bin.o: da_bin.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h bi_funct.h ../libmawk/symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h repl.h \ + field.h num.h fin.h vars.h f2d.h +da_common.o: da_common.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h bi_funct.h ../libmawk/symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h repl.h \ + field.h num.h +da_bin_helper.o: da_bin_helper.c da_bin.h mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h +error.o: error.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h scan.h ../libmawk/symtype.h \ + ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h ../libmawk/parse.h bi_vars.h vargs.h zmalloc.h \ + memory.h ../libmawk/zmalloc.h cell.h repl.h +bi_vars.o: bi_vars.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h bi_vars.h \ + field.h init.h ../libmawk/symtype.h memory.h ../libmawk/zmalloc.h num.h \ + cell.h repl.h array_environ.h +bi_funct_common.o: bi_funct_common.c mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h bi_funct.h ../libmawk/symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h bi_vars.h +array.o: array.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h array.h cell.h repl.h array_orig.h \ + memory.h ../libmawk/zmalloc.h zmalloc.h +array_orig.o: array_orig.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h memory.h \ + ../libmawk/zmalloc.h zmalloc.h field.h bi_vars.h num.h array_orig.h \ + cell.h repl.h split.h +array_generic.o: array_generic.c mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h zmalloc.h memory.h ../libmawk/zmalloc.h split.h +field_common.o: field_common.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h field.h init.h \ + ../libmawk/symtype.h ../libmawk/array.h ../libmawk/cell.h \ + ../libmawk/repl.h ../libmawk/array_orig.h memory.h ../libmawk/zmalloc.h \ + scan.h ../libmawk/parse.h bi_vars.h repl.h regexp.h +re_cmpl.o: re_cmpl.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h memory.h ../libmawk/zmalloc.h \ + scan.h ../libmawk/symtype.h ../libmawk/array.h ../libmawk/cell.h \ + ../libmawk/repl.h ../libmawk/array_orig.h ../libmawk/parse.h regexp.h \ + repl.h +zmalloc.o: zmalloc.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h zmalloc.h memory.h \ + ../libmawk/zmalloc.h +fin_common.o: fin_common.c conf.h mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h fin.h memory.h ../libmawk/zmalloc.h bi_vars.h field.h \ + symtype.h ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h scan.h ../libmawk/symtype.h ../libmawk/parse.h \ + vio.h +files.o: files.c conf.h mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h files.h memory.h \ + ../libmawk/zmalloc.h fin.h vio.h +matherr.o: matherr.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h num.h +fcall.o: fcall.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h code.h \ + ../libmawk/memory.h ../libmawk/zmalloc.h +version.o: version.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h vio_orig.h +missing.o: missing.c nstd.h conf.h +math_wrap.o: math_wrap.c math_wrap.h num.h conf.h sizes.h \ + ../libmawk/num_double.h +cast.o: cast.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h field.h memory.h \ + ../libmawk/zmalloc.h scan.h ../libmawk/symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h \ + ../libmawk/parse.h repl.h num.h cell.h +cell.o: cell.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h repl.h cell.h +scancode.o: scancode.c +str.o: str.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h scan.h ../libmawk/symtype.h \ + ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h ../libmawk/parse.h +array_environ.o: array_environ.c mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h symtype.h ../libmawk/array.h ../libmawk/cell.h \ + ../libmawk/repl.h ../libmawk/array_orig.h memory.h ../libmawk/zmalloc.h \ + field.h bi_vars.h num.h array_orig.h array_generic.h cell.h repl.h +files_children.o: files_children.c conf.h mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h files.h memory.h ../libmawk/zmalloc.h fin.h +vio_orig.o: vio_orig.c conf.h mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h files.h memory.h \ + ../libmawk/zmalloc.h fin.h field.h vio.h array_environ.h +num_double.o: num_double.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h num.h math_wrap.h +execute.o: execute.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h memory.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h field.h \ + bi_funct.h ../libmawk/symtype.h bi_vars.h regexp.h repl.h fin.h debug.h \ + num.h math_wrap.h cell.h execute.h f2d.h execute_debug.h +bi_funct.o: bi_funct.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h bi_funct.h ../libmawk/symtype.h \ + ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h bi_vars.h memory.h ../libmawk/zmalloc.h init.h \ + files.h fin.h field.h regexp.h repl.h num.h math_wrap.h vio.h cell.h +print.o: print.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h bi_vars.h bi_funct.h \ + ../libmawk/symtype.h ../libmawk/array.h ../libmawk/cell.h \ + ../libmawk/repl.h ../libmawk/array_orig.h memory.h ../libmawk/zmalloc.h \ + field.h scan.h ../libmawk/parse.h files.h vio.h cell.h repl.h +debug.o: debug.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h +field_exec.o: field_exec.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h field.h init.h \ + ../libmawk/symtype.h ../libmawk/array.h ../libmawk/cell.h \ + ../libmawk/repl.h ../libmawk/array_orig.h memory.h ../libmawk/zmalloc.h \ + scan.h ../libmawk/parse.h bi_vars.h repl.h regexp.h cell.h +split.o: split.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h bi_vars.h \ + bi_funct.h ../libmawk/symtype.h memory.h ../libmawk/zmalloc.h scan.h \ + ../libmawk/parse.h regexp.h field.h +parse.o: parse.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h types.h symtype.h \ + ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h code.h ../libmawk/memory.h ../libmawk/zmalloc.h \ + memory.h bi_funct.h ../libmawk/symtype.h bi_vars.h jmp.h field.h files.h \ + scan.h ../libmawk/parse.h zmalloc.h f2d.h +scan.o: scan.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h scan.h ../libmawk/symtype.h \ + ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h ../libmawk/parse.h memory.h ../libmawk/zmalloc.h \ + field.h init.h fin.h repl.h code.h ../libmawk/memory.h bi_vars.h vio.h \ + da_bin_helper.h files.h +da_text.o: da_text.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h bi_funct.h ../libmawk/symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h repl.h \ + field.h num.h f2d.h +code_dump.o: code_dump.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h init.h ../libmawk/symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h jmp.h \ + field.h da_bin_helper.h +kw.o: kw.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h parse.h \ + init.h ../libmawk/symtype.h +jmp.o: jmp.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h jmp.h code.h \ + ../libmawk/memory.h ../libmawk/zmalloc.h sizes.h init.h \ + ../libmawk/symtype.h memory.h +init.o: init.c conf.h mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h memory.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h init.h \ + ../libmawk/symtype.h scan.h ../libmawk/parse.h bi_vars.h field.h \ + zmalloc.h vio.h version.h da_bin_helper.h cell.h repl.h files.h +libmawk.o: libmawk.c libmawk.h ../libmawk/mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h ../libmawk/init.h ../libmawk/symtype.h \ + ../libmawk/array.h ../libmawk/cell.h ../libmawk/repl.h \ + ../libmawk/array_orig.h ../libmawk/code.h ../libmawk/memory.h \ + ../libmawk/zmalloc.h ../libmawk/files.h ../libmawk/array_generic.h \ + ../libmawk/vio_orig.h ../libmawk/vio_fifo.h ../libmawk/vio.h \ + ../libmawk/zfifo.h ../libmawk/execute.h debug.h mawk.h memory.h sizes.h \ + array.h fin.h num.h vio.h vars.h vio_fifo.h vio_orig.h +fin_exec.o: fin_exec.c conf.h mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h fin.h memory.h \ + ../libmawk/zmalloc.h bi_vars.h field.h symtype.h ../libmawk/array.h \ + ../libmawk/cell.h ../libmawk/repl.h ../libmawk/array_orig.h scan.h \ + ../libmawk/symtype.h ../libmawk/parse.h vio.h init.h vars.h cell.h \ + repl.h files.h +rexp.o: rexp/rexp.c rexp/rexp.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/mawk.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h +rexp0.o: rexp/rexp0.c rexp/rexp.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/mawk.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h ../libmawk/zmalloc.h +rexp1.o: rexp/rexp1.c rexp/rexp.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/mawk.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h ../libmawk/zmalloc.h +rexp2.o: rexp/rexp2.c rexp/rexp.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/mawk.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h +rexp3.o: rexp/rexp3.c rexp/rexp.h ../libmawk/nstd.h ../libmawk/conf.h \ + ../libmawk/mawk.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h +zfifo.o: zfifo.c mawk.h ../libmawk/conf.h ../libmawk/nstd.h \ + ../libmawk/conf.h ../libmawk/types.h ../libmawk/sizes.h ../libmawk/num.h \ + ../libmawk/sizes.h ../libmawk/num_double.h ../libmawk/mawk.h \ + ../libmawk/bi_vars.h ../libmawk/vio.h zfifo.h zmalloc.h +vio_fifo.o: vio_fifo.c vio_fifo.h mawk.h ../libmawk/conf.h \ + ../libmawk/nstd.h ../libmawk/conf.h ../libmawk/types.h \ + ../libmawk/sizes.h ../libmawk/num.h ../libmawk/sizes.h \ + ../libmawk/num_double.h ../libmawk/mawk.h ../libmawk/bi_vars.h \ + ../libmawk/vio.h vio.h zfifo.h memory.h ../libmawk/zmalloc.h diff --git a/src/libmawk/README.mawk b/src/libmawk/README.mawk new file mode 100644 index 0000000..8e835f3 --- /dev/null +++ b/src/libmawk/README.mawk @@ -0,0 +1,92 @@ +Mawk -- an implementation of new/posix awk +version 1.3.2 + +Installation instructions in file INSTALL. + +Bug reports, comments, questions, etc. to +Mike Brennan, brennan@whidbey.com. +ftp site: ftp.whidbey.net in ~/pub/brennan + +Version 1.3 implements a new internal design for arrays. See file +CHANGES. + +Version 1.2.2 is best for MsDOS +--------------------------------------------------------- + +Changes from version 1.1.4 to 1.2: + +1) Limit on code size set by #define in sizes.h is gone. + +2) A number of obscure bugs have been fixed such as, + you can now make a recursive function call inside a for( i in A) loop. + Function calls with array parameters in loop expressions sometimes + generated erroneous internal code. + + See RCS log comments in code for details. + + Reported bugs are fixed. + +3) new -W options + + + -We file : reads commands from file and next argument, regardless + of form, is ARGV[1]. Useful for passing -v , -f etc to + an awk program started with #!/.../mawk + + + #!/usr/local/bin/mawk -We + + myprogram -v works, while + + #!/usr/local/bin/mawk -f + + myprogram -v gives error message + mawk: option -v lacks argument + + This is really a posix bozo. Posix says you end arguments with + -- , but this doesn't work with the #! convention. + + + + -W interactive : forces stdout to be unbuffered and stdin to + be line buffered. Records from stdin are lines regardless of + the value of RS. Useful for interaction with a mawk on a pipe. + + -W dump, -Wd : disassembles internal code to stdout (used to be + stderr) and exits 0. + +4) FS = "" causes each record to be broken into characters and placed + into $1,$2 ... + + same with split(x,A,"") and split(x,A,//) + + +5) print > "/dev/stdout" writes to stdout, exactly the same as + print + + This is useful for passing stdout to + + function my_special_output_routine(s, file) + { + # do something fancy with s + print s > file + } + + +6) New built-in function fflush() -- copied from the lastest att awk. + + fflush() : flushes stdout and returns 0 + fflush(file) flushes file and returns 0; if file was not an + open output file then returns -1. + +7) delete A ; -- removes all elements of the array A + + intended to replace: + + for( i in A) delete A[i] + +8) mawk errors such as compilation failure, file open failure, etc. + now exit 2 which reserves exit 1 for the user. + +9) No program now silently exits 0, prior behavior was to exit 2 with + an error message diff --git a/src/libmawk/array.c b/src/libmawk/array.c new file mode 100644 index 0000000..a09aa3b --- /dev/null +++ b/src/libmawk/array.c @@ -0,0 +1,99 @@ +/* +libmawk (C) 2014, Tibor 'Igor2' Palinkas; + +libMawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +*/ +#include +#include +#include "mawk.h" +#include "array.h" +#include "memory.h" +#include "zmalloc.h" +#include "cell.h" + +/* -- generic array implementation -- */ + +/* - creating a new array does not depend on the implementation - */ +mawk_array_t mawk_array_new(mawk_state_t *MAWK, const array_imp_t *imp) +{ + mawk_array_t a; + a = MAWK_ZMALLOC(MAWK, struct array); + memset(a, 0, sizeof(struct array)); + if (imp == NULL) + a->imp = mawk_array_orig_imp; + else + a->imp = *imp; + return a; +} + +void mawk_array_destroy(mawk_state_t *MAWK, mawk_array_t ARR) +{ + mawk_array_clear(MAWK, ARR); + mawk_zfree(MAWK, ARR, sizeof(struct array)); +} + +/* catting the indices together doesn't depend on implementation */ +mawk_cell_t *mawk_array_cat(mawk_state_t *MAWK, mawk_cell_t *sp, int cnt) +{ + mawk_cell_t *p; /* walks the eval stack */ + mawk_cell_t subsep; /* local copy of SUBSEP */ + unsigned subsep_len; /* string length of subsep_str */ + char *subsep_str; + + unsigned total_len; /* length of cat'ed expression */ + mawk_cell_t *top; /* value of sp at entry */ + char *target; /* build cat'ed char* here */ + mawk_string_t *sval; /* build cat'ed mawk_string_t here */ + mawk_cellcpy(MAWK, &subsep, SUBSEP); + if (subsep.type < C_STRING) + mawk_cast1_to_str(MAWK, &subsep); + subsep_len = string(&subsep)->len; + subsep_str = string(&subsep)->str; + + top = sp; + sp -= (cnt - 1); + + total_len = (cnt - 1) * subsep_len; + for (p = sp; p <= top; p++) { + if (p->type < C_STRING) + mawk_cast1_to_str(MAWK, p); + total_len += string(p)->len; + } + + sval = mawk_new_STRING0(MAWK, total_len); + target = sval->str; + for (p = sp; p < top; p++) { + memcpy(target, string(p)->str, string(p)->len); + target += string(p)->len; + memcpy(target, subsep_str, subsep_len); + target += subsep_len; + } + /* now p == top */ + memcpy(target, string(p)->str, string(p)->len); + + for (p = sp; p <= top; p++) + free_STRING(string(p)); + free_STRING(string(&subsep)); + /* set contents of sp , sp->type > C_STRING is possible so reset */ + sp->type = C_STRING; + sp->ptr = (PTR) sval; + return sp; +} + +void mawk_array_clear_common(mawk_state_t *MAWK, mawk_array_t A) +{ + A->ptr = NULL; + A->size = 0; + A->limit = 0; + A->hmask = 0; + A->type = 0; + +} + +/* - naive implementation for the complex queries - + these assume the most basic operations (set/get/find/delet) to be implemented + and provide a slow, but generic implementation for the higher level + calls */ + +/* TODO */ diff --git a/src/libmawk/array.h b/src/libmawk/array.h new file mode 100644 index 0000000..8bd18f6 --- /dev/null +++ b/src/libmawk/array.h @@ -0,0 +1,166 @@ +/* +array.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-96, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +*/ + +/* +This file originally had been generated with the command + + notangle -R'"array.h"' array.w > array.h + +but is maintained as .h in libmawk. + +(Notangle is part of Norman Ramsey's noweb literate programming package +available from CTAN(ftp.shsu.edu)). +*/ + +#ifndef ARRAY_H +#define ARRAY_H 1 + +#define AY_NULL 0 +#define AY_INT 1 +#define AY_STR 2 +#define AY_SPLIT 4 + +#define NO_MAWK_CREATE 0 +#define MAWK_CREATE 1 + +#include "cell.h" +#include "array_orig.h" + +/* create a new, empty array, copying implementation from imp; if imp is NULL, + use the default (original) implementation. */ +mawk_array_t mawk_array_new(mawk_state_t *MAWK, const array_imp_t *imp); + +/* free all memory used by ARR */ +void mawk_array_destroy(mawk_state_t *MAWK, mawk_array_t ARR); + +/* concat multiple expressions separated by SUBSEP to be an array index */ +mawk_cell_t *mawk_array_cat(mawk_state_t *MAWK, mawk_cell_t *sp, int cnt); + +/* clear the common administrative part of the array, leaving imp alone; + this must be used instead of memsetting the mawk_array_t */ +void mawk_array_clear_common(mawk_state_t *MAWK, mawk_array_t A); + + +/* wrappers for accessing the implementations; a layer of macros is introduced + here to guarantee future flexibility */ +/* **** WARNING: these macros will ecaluate ARR_ at least twice! **** */ +#define mawk_array_find(MAWK_, ARR_, idx_, result_, create_) (ARR_)->imp.find((MAWK_), (ARR_), (idx_), (result_), (create_)) +#define mawk_array_set(MAWK_, ARR_, idx_, val_) (ARR_)->imp.set((MAWK_), (ARR_), (idx_), (val_)) +#define mawk_array_delete(MAWK_, ARR_, cell_) (ARR_)->imp.delet((MAWK_), (ARR_), (cell_)) +#define mawk_array_clear(MAWK_, ARR_) (ARR_)->imp.clear((MAWK_), (ARR_)) +#define mawk_array_loop_vector(MAWK_, ARR_, sizep_) (ARR_)->imp.loop_vect((MAWK_), (ARR_), (sizep_)) +#define mawk_array_load(MAWK_, ARR_, cnt_) (ARR_)->imp.load((MAWK_), (ARR_), (cnt_)) + + + + + +/* The following macros are used in execute.c for fast and convenient access + to cells on SP */ + +/* check whether an array is pure (no side effects, aka orig implementation) for + read or read+write */ +#define mawk_array_pure4wr(MAWK, ARR_) (ARR_)->imp.set == mawk_array_orig_imp.set +#define mawk_array_pure4del(MAWK, ARR_) (ARR_)->imp.delet == mawk_array_orig_imp.delet +#define mawk_array_pure4rd(MAWK, ARR_) (ARR_)->imp.find == mawk_array_orig_imp.find +#define mawk_array_pure4rdwr(MAWK, ARR_) ((mawk_array_pure4wr((MAWK), (ARR_))) && (mawk_array_pure4del((MAWK), (ARR_))) && (mawk_array_pure4rd((MAWK), (ARR_)))) + +#define mawk_array_pure(MAWK, ARR_REF_, for_write) ((for_write) ? (mawk_array_pure4rdwr((MAWK), (mawk_array_t)(ARR_REF_)->ptr)) : (mawk_array_pure4rd((MAWK), (mawk_array_t)(ARR_REF_)->ptr))) + +/* change the value of an array pointed to by ARR_REF_ (typer C_ARR_REF). + This wrapper is useful from execute.c where multiple assignments + to array elemts will have to do the same: + - look up the index in the array + - modify the content of the array element + - destroy the index and the rvalue + - put the value of the expr in res_ + + Note: mawk_cell_destroy(MAWK, (mawk_cell_t *)(ARR_REF_)) is not required: destroy of C_ARR_REF is a nop +*/ +#define mawk_array_set_execute_(MAWK, res_, ARR_REF_, IDX_, rvalue_) \ +do { \ + mawk_array_set(MAWK, (mawk_array_t)((ARR_REF_)->ptr), (IDX_), rvalue_); \ + mawk_cell_destroy(MAWK, (mawk_cell_t *)(IDX_)); \ + if (IDX_ != res_) \ + mawk_cell_destroy(MAWK, res_); \ + mawk_cellcpy(MAWK, res_, rvalue_); \ + mawk_cell_destroy(MAWK, rvalue_); \ +} while(0) + +/* look up an array element; place the resulting cell in res_ + + Does not destroy IDX_. + + Note: mawk_cell_destroy(MAWK, (mawk_cell_t *)(ARR_REF_)) is not required: destroy of C_ARR_REF is a nop +*/ +#define mawk_array_get_execute_(MAWK, res_, ARR_REF_, IDX_) \ +do { \ + mawk_array_find(MAWK, (mawk_array_t)((ARR_REF_)->ptr), (IDX_), (res_), MAWK_CREATE); \ +} while(0) + + +/* this must not be used on arrays with side effects */ +mawk_cell_t *mawk_array_find_orig_(mawk_state_t *MAWK, mawk_array_t A, const mawk_cell_t *cp, int create_flag); +#define mawk_array_getptr_execute_(MAWK, res_, ARR_REF_, IDX_) \ +do { \ + res_ = mawk_array_find_orig_(MAWK, (mawk_array_t)((ARR_REF_)->ptr), (IDX_), MAWK_CREATE); \ +} while(0) + + +/* same as mawk_arra_get_execute plus cast the result to number if it is not + a number already + does not destroy IDX + */ +#define mawk_array_getnum_execute_(MAWK, res_, ARR_REF_, IDX_) \ +do { \ + mawk_array_get_execute(MAWK, (res_), (ARR_REF_), (IDX_)); \ + if ((res_)->type != C_NUM) \ + mawk_cast1_to_num(MAWK, (res_)); \ +} while(0) + +#ifdef DEBUG + /* in debug mode use static inlines for easier debugging */ + static inline void mawk_array_get_execute(mawk_state_t *MAWK, mawk_cell_t *res, mawk_cell_t *arr_ref, mawk_cell_t *idx) + { + mawk_array_get_execute_(MAWK, res, arr_ref, idx); + } + + static inline void mawk_array_getptr_execute(mawk_state_t *MAWK, mawk_cell_t *res, mawk_cell_t *arr_ref, mawk_cell_t *idx) + { + mawk_array_getptr_execute_(MAWK, res, arr_ref, idx); + } + + static inline void mawk_array_getnum_execute(mawk_state_t *MAWK, mawk_cell_t *res, mawk_cell_t *arr_ref, mawk_cell_t *idx) + { + mawk_array_getnum_execute_(MAWK, res, arr_ref, idx); + } + + static inline void mawk_array_set_execute(mawk_state_t *MAWK, mawk_cell_t *res, mawk_cell_t *arr_ref, mawk_cell_t *idx, mawk_cell_t *rvalue) + { + mawk_array_set_execute_(MAWK, res, arr_ref, idx, rvalue); + } +#else +#define mawk_array_getnum_execute mawk_array_getnum_execute_ +#define mawk_array_getptr_execute mawk_array_getptr_execute_ +#define mawk_array_get_execute mawk_array_get_execute_ +#define mawk_array_set_execute mawk_array_set_execute_ +#endif + + + +#endif /* ARRAY_H */ + + + diff --git a/src/libmawk/array_environ.c b/src/libmawk/array_environ.c new file mode 100644 index 0000000..503b8ce --- /dev/null +++ b/src/libmawk/array_environ.c @@ -0,0 +1,233 @@ +/* +libmawk changes (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +*/ + +/* + +ENVIRON[] implementation +~~~~~~~~~~~~~~~~~~~~~~~~ +Although POSIX doesn't require this, in libmawk ENVIRON[] should affect +any exec'd child process (getline and system()). Since there may be multiple +mawk_state_t instances, ENVIRON[] can not directly manipulate the global +char **environ. Instead each context has its own array. + +Unlike in the original code, the ENVIRON[] array is not initialized +before the first runtime reference to it. This saves some memory and +some time, but more importantly it makes libmawk able to run in +situations where there's no standard environment (e.g. the Linux kernel). + +*/ + +#include +#include +#include "mawk.h" +#include "symtype.h" +#include "memory.h" +#include "field.h" +#include "bi_vars.h" +#include "num.h" +#include "array_orig.h" +#include "array_generic.h" +#include "cell.h" + +/* bits of array state */ +enum { + ST_LOADED = 1, + ST_MODIFIED = 2 +}; + +/* copy the process' environ to ENVIRON */ +static void mawk_load_environ(mawk_state_t *MAWK, mawk_array_t ENV) +{ + mawk_cell_t ci, cv; + extern char **environ; + register char **p = environ; /* walks environ */ + char *s; /* looks for the '=' */ + + ci.type = C_STRING; + cv.type = C_MBSTRN; + + while (*p) { + if ((s = strchr(*p, '='))) { /* shouldn't fail */ + int len = s - *p; + ci.ptr = (PTR) mawk_new_STRING0(MAWK, len); + memcpy(string(&ci)->str, *p, len); + s++; + + cv.ptr = (PTR) mawk_new_STRING(MAWK, s); + mawk_array_orig_imp.set(MAWK, ENV, &ci, &cv); + + free_STRING(string(&ci)); + } + p++; + } +} + +static void chkenv(mawk_state_t *MAWK, mawk_array_t A) +{ + if ((A->state.i & ST_LOADED) == 0) { + A->state.i |= ST_LOADED; + mawk_load_environ(MAWK, A); + } +} + + +int mawk_array_find_environ(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *result, int create) +{ + chkenv(MAWK, arr); + if ((create) && ((arr->state.i & ST_MODIFIED) == 0)) { + /* we are allowed to create the item in a so-far-unmodofied ENVIRON[]: + check if the item exists and mark the array modified only if it + really would be created - this keeps the array un-modified after + read-only operations + NOTE: maybe idx == result, so we don't want result to be destroyed + in this call, thus result is NULL. + */ + if (mawk_array_orig_imp.find(MAWK, arr, idx, NULL, 0) < 1) + arr->state.i |= ST_MODIFIED; + } + return mawk_array_orig_imp.find(MAWK, arr, idx, result, create); +} + + +void mawk_array_set_environ(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *val) +{ + chkenv(MAWK, arr); + arr->state.i |= ST_MODIFIED; + mawk_array_orig_imp.set(MAWK, arr, idx, val); +} + + +static void mawk_array_delete_environ(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx) +{ + chkenv(MAWK, arr); + arr->state.i |= ST_MODIFIED; + mawk_array_orig_imp.delet(MAWK, arr, idx); +} + +static mawk_string_t **mawk_array_loop_vector_environ(mawk_state_t *MAWK, mawk_array_t A, unsigned *size) +{ + chkenv(MAWK, A); + /* could use the _generic() as well, but that'd be a tiny bit slower and + there's no side effect other than chkenv() */ +/* return mawk_array_loop_vector_generic(MAWK, A, size);*/ + return mawk_array_orig_imp.loop_vect(MAWK, A, size); +} + +/* mark array initialized before clearning, so if a delete ENVIRON + precedes any other call, the environment is not loaded at all. */ +void mawk_array_clear_environ(mawk_state_t *MAWK, mawk_array_t arr) +{ + arr->state.i |= ST_LOADED | ST_MODIFIED; + mawk_array_clear_generic(MAWK, arr); +} + +array_imp_t mawk_array_environ_imp = { + mawk_array_find_environ, + mawk_array_set_environ, + mawk_array_delete_environ, + + mawk_array_clear_environ, + mawk_array_loop_vector_environ, + mawk_array_load_generic, + + mawk_array_it_start_orig, + mawk_array_it_next_orig, + mawk_array_it_stop_orig +}; + + +void mawk_environ_init(mawk_state_t *MAWK) +{ + SYMTAB *s; + s = mawk_insert(MAWK, "ENVIRON"); + s->type = ST_ARRAY; + s->stval.array = mawk_array_new(MAWK, &mawk_array_environ_imp); +} + +#ifdef MAWK_MEM_PEDANTIC +void mawk_environ_uninit(mawk_state_t *MAWK) +{ + SYMTAB *s; + s = mawk_find(MAWK, "ENVIRON", 0); + if (s != NULL) { + if (s->type == ST_ARRAY) + mawk_array_destroy(MAWK, s->stval.array); + mawk_delete(MAWK, "ENVIRON", 0); + } +} +#endif + +extern char **environ; +#define grow() \ +do { \ + if (used >= alloced) { \ + alloced += 128; \ + ret = mawk_realloc(MAWK, ret, sizeof(char *) * alloced); \ + } \ +} while(0) + +char **mawk_environ_extract(mawk_state_t *MAWK) +{ + void *it; + const mawk_cell_t *idx; + char **ret = NULL; + unsigned used = 0, alloced = 0; + mawk_array_t arr; + SYMTAB *s; + + s = mawk_find(MAWK, "ENVIRON", 0); + /* ENVIRON[] is not set up - use the global environ[] */ + if (s == NULL) + return environ; + + arr = s->stval.array; + + /* ENVIRON[] is not loaded - use the global environ[] */ + if ((arr->state.i & ST_LOADED) == 0) + return environ; + + /* ENVIRON[] is not modified - use the global environ[], cheaper than to copy */ + if ((arr->state.i & ST_MODIFIED) == 0) + return environ; + + + it = arr->imp.it_start(MAWK, arr); + for(;;) { + char *si, *sv; + mawk_cell_t cv; + int sil, svl; + + idx = arr->imp.it_next(MAWK, arr, it); + if (idx == NULL) + break; + si = string(idx)->str; + if (mawk_array_find_environ(MAWK, arr, idx, &cv, 0) <= 0) + sv = ""; + else + sv = string(&cv)->str; + + grow(); + sil = strlen(si); + svl = strlen(sv); + ret[used] = mawk_malloc(MAWK, sil+svl+2); + memcpy(ret[used], si, sil); + ret[used][sil] = '='; + memcpy(ret[used]+sil+1, sv, svl+1); + used++; + } + + grow(); + ret[used] = NULL; + used++; + + arr->imp.it_stop(MAWK, arr, it); + return ret; +} + diff --git a/src/libmawk/array_environ.h b/src/libmawk/array_environ.h new file mode 100644 index 0000000..69647a2 --- /dev/null +++ b/src/libmawk/array_environ.h @@ -0,0 +1,17 @@ +/* create a blank array reserved for ENVIRON[] */ +void mawk_environ_init(mawk_state_t *MAWK); + +#ifdef MAWK_MEM_PEDANTIC +void mawk_environ_uninit(mawk_state_t *MAWK); +#endif + +/* allocate memory and return an array suitable for env arg of execle(); + allocation is done using mawk_malloc() to ensure a large ENVIRON[] doesn't + cause excess memory allocation (ram limit!). However, this allocation + is not to be free'd ever, since an exec() or exit() follows the call. + Thus if there's no change to the ENVIRON[] array, a pointer to + environ is returned and no allocation is done. + + Returns NULL on error +*/ +char **mawk_environ_extract(mawk_state_t *MAWK); diff --git a/src/libmawk/array_environ_off.c b/src/libmawk/array_environ_off.c new file mode 100644 index 0000000..e867e50 --- /dev/null +++ b/src/libmawk/array_environ_off.c @@ -0,0 +1,4 @@ +#include "mawk.h" +void mawk_environ_init(mawk_state_t *MAWK) +{ +} diff --git a/src/libmawk/array_generic.c b/src/libmawk/array_generic.c new file mode 100644 index 0000000..91c38cc --- /dev/null +++ b/src/libmawk/array_generic.c @@ -0,0 +1,90 @@ +/* +libmawk changes (C) 2014, Tibor 'Igor2' Palinkas; + +This is a source file for mawk, an implementation of +the AWK programming language. + +This file implements generic virtual array calls clear, loop_vector and load. +These calls rely on the low level find/set/iterator implementation of the +custom array and saves the time for the implementor writing these functions. +In return these functions are not fast at all. +*/ + +#include "mawk.h" +#include "zmalloc.h" +#include "memory.h" +#include "split.h" + +void mawk_array_clear_generic(mawk_state_t *MAWK, mawk_array_t arr) +{ + void *it; + const mawk_cell_t *idx; + + if (arr->imp.it_start == NULL) + mawk_bozo(MAWK, "mawk_array_clear_generic() on an array without iterator"); + if (arr->imp.delet == NULL) + mawk_bozo(MAWK, "mawk_array_clear_generic() on an array without delet"); + + it = arr->imp.it_start(MAWK, arr); + for(;;) { + idx = arr->imp.it_next(MAWK, arr, it); + if (idx == NULL) + break; + arr->imp.delet(MAWK, arr, idx); + } + arr->imp.it_stop(MAWK, arr, it); +} + +mawk_string_t **mawk_array_loop_vector_generic(mawk_state_t *MAWK, mawk_array_t arr, unsigned *size) +{ + void *it; + const mawk_cell_t *idx; + mawk_string_t **ret = NULL; + unsigned used = 0, alloced = 0; + + if (arr->imp.it_start == NULL) + mawk_bozo(MAWK, "mawk_array_loop_vector_generic() on an array without iterator"); + + it = arr->imp.it_start(MAWK, arr); + for(;;) { + idx = arr->imp.it_next(MAWK, arr, it); + if (idx == NULL) + break; + if (used >= alloced) { + alloced += 128; + ret = mawk_realloc(MAWK, ret, sizeof(mawk_string_t *) * alloced); + } + ret[used] = idx->ptr; + ret[used]->ref_cnt++; + used++; + } + arr->imp.it_stop(MAWK, arr, it); + *size = used; + return ret; +} + +void mawk_array_load_generic(mawk_state_t *MAWK, mawk_array_t arr, int cnt) +{ + mawk_cell_t cidx, cval; + + if (arr->imp.set == NULL) + mawk_bozo(MAWK, "mawk_array_load_generic() on an array without set"); + + if (arr->imp.clear != NULL) + arr->imp.clear(MAWK, arr); + else + mawk_array_clear_generic(MAWK, arr); + + cidx.type = C_NUM; + cval.type = C_STRING; + +#define action(idx, sval) \ + cidx.d.dval = idx+1; \ + cval.ptr = (PTR) sval; \ + arr->imp.set(MAWK, arr, &cidx, &cval); + + mawk_split_walk(MAWK, cnt, 1, action); + +#undef action + +} diff --git a/src/libmawk/array_generic.h b/src/libmawk/array_generic.h new file mode 100644 index 0000000..0ed8a2e --- /dev/null +++ b/src/libmawk/array_generic.h @@ -0,0 +1,4 @@ +void mawk_array_clear_generic(mawk_state_t *, mawk_array_t); +mawk_string_t **mawk_array_loop_vector_generic(mawk_state_t *, mawk_array_t, unsigned *vsize); +void mawk_array_load_generic(mawk_state_t *, mawk_array_t, int cnt); + diff --git a/src/libmawk/array_orig.c b/src/libmawk/array_orig.c new file mode 100644 index 0000000..ad3b7e6 --- /dev/null +++ b/src/libmawk/array_orig.c @@ -0,0 +1,684 @@ +/* +libmawk changes (C) 2009-2014, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-96, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +*/ + +/* +This file had been originally generated with the command + + notangle -R'"array.c"' array.w > array.c + +but is maintained as .c in libmawk. + +(Notangle is part of Norman Ramsey's noweb literate programming package +available from CTAN(ftp.shsu.edu)). +*/ +#include +#include +#include "mawk.h" +#include "symtype.h" +#include "memory.h" +#include "zmalloc.h" +#include "field.h" +#include "bi_vars.h" +#include "num.h" +#include "array_orig.h" +#include "cell.h" +#include "split.h" + +struct anode; +typedef struct { + struct anode *slink, *ilink; +} DUAL_LINK; + +typedef struct anode { + struct anode *slink; + struct anode *ilink; + mawk_string_t *sval; + unsigned hval; + Int ival; + mawk_cell_t cell; +} ANODE; + + +#define NOT_AN_IVALUE (-Max_Int-1) /* usually 0x80000000 */ + +#define STARTING_HMASK 63 /* 2^6-1, must have form 2^n-1 */ +#define MAX_AVE_LIST_LENGTH 12 +#define hmask_to_limit(x) (((x)+1)*MAX_AVE_LIST_LENGTH) + +static ANODE *mawk_find_by_ival(mawk_state_t *, mawk_array_t, Int, int); +static ANODE *mawk_find_by_sval(mawk_state_t *, mawk_array_t, mawk_string_t *, int); +static void add_string_associations(mawk_state_t *, mawk_array_t); +static void make_empty_table(mawk_state_t *, mawk_array_t, int); +static void convert_split_array_to_table(mawk_state_t *, mawk_array_t); +static void double_the_hash_table(mawk_state_t *, mawk_array_t); +static unsigned ahash(mawk_string_t *); + + +mawk_cell_t *mawk_array_find_orig_(mawk_state_t *MAWK, mawk_array_t A, const mawk_cell_t *cp, int create_flag) +{ + ANODE *ap; + if (A->size == 0 && !create_flag) + /* eliminating this trivial case early avoids unnecessary conversions later */ + return (mawk_cell_t *) 0; + switch (cp->type) { + case C_NUM: + { + mawk_num_t d = cp->d.dval; + Int ival = mawk_d_to_I(d); + if ((mawk_num_t) ival == d) { + if (A->type == AY_SPLIT) { + if (ival >= 1 && ival <= A->size) + return (mawk_cell_t *) A->ptr + (ival - 1); + if (!create_flag) + return (mawk_cell_t *) 0; + convert_split_array_to_table(MAWK, A); + } + else if (A->type == AY_NULL) { + make_empty_table(MAWK, A, AY_INT); + if (A->ptr == NULL) + return NULL; + } + ap = mawk_find_by_ival(MAWK, A, ival, create_flag); + } + else { + /* convert to string */ + char buff[260]; + mawk_string_t *sval; + sprintf(buff, string(MAWK_CONVFMT)->str, d); + sval = mawk_new_STRING(MAWK, buff); + ap = mawk_find_by_sval(MAWK, A, sval, create_flag); + free_STRING(sval); + } + } + + break; + case C_NOINIT: + ap = mawk_find_by_sval(MAWK, A, &(MAWK->null_str), create_flag); + break; + default: + ap = mawk_find_by_sval(MAWK, A, string(cp), create_flag); + break; + } + return ap ? &ap->cell : (mawk_cell_t *) 0; +} + +static int mawk_array_find_orig(mawk_state_t *MAWK, mawk_array_t A, const mawk_cell_t *idx, mawk_cell_t *result, int create) +{ + mawk_cell_t *member; + + member = mawk_array_find_orig_(MAWK, A, idx, create); + if (result != NULL) { + mawk_cell_destroy(MAWK, result); + result->type = C_NOINIT; + } + if (member == NULL) + return 0; + if (result != NULL) + mawk_cellcpy(MAWK, result, member); + return 1; +} + +void mawk_array_set_orig(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *val) +{ + mawk_cell_t *member; + member = mawk_array_find_orig_(MAWK, arr, idx, MAWK_CREATE); + mawk_cell_destroy(MAWK, member); + mawk_cellcpy(MAWK, member, val); +} + + +static void mawk_array_delete_orig(mawk_state_t *MAWK, mawk_array_t A, const mawk_cell_t *cp) +{ + ANODE *ap; + if (A->size == 0) + return; + switch (cp->type) { + case C_NUM: + { + mawk_num_t d = cp->d.dval; + Int ival = mawk_d_to_I(d); + if ((mawk_num_t) ival == d) { + if (A->type == AY_SPLIT) { + if (ival >= 1 && ival <= A->size) { + convert_split_array_to_table(MAWK, A); + } + else + return; /* ival not in range */ + } + ap = mawk_find_by_ival(MAWK, A, ival, NO_MAWK_CREATE); + if (ap) { /* remove from the front of the ilist */ + DUAL_LINK *table = (DUAL_LINK *) A->ptr; + table[ap->ival & A->hmask].ilink = ap->ilink; + if (ap->sval) { + ANODE *p, *q = 0; + int index = ap->hval & A->hmask; + p = table[index].slink; + while (p != ap) { + q = p; + p = q->slink; + } + if (q) + q->slink = p->slink; + else + table[index].slink = p->slink; + free_STRING(ap->sval); + } + + mawk_cell_destroy(MAWK, &ap->cell); + MAWK_ZFREE(MAWK, ap); + if (--A->size == 0) + mawk_array_clear(MAWK, A); + + + } + return; + } + + else { /* get the string value */ + char buff[260]; + mawk_string_t *sval; + sprintf(buff, string(MAWK_CONVFMT)->str, d); + sval = mawk_new_STRING(MAWK, buff); + ap = mawk_find_by_sval(MAWK, A, sval, NO_MAWK_CREATE); + free_STRING(sval); + } + } + break; + case C_NOINIT: + ap = mawk_find_by_sval(MAWK, A, &(MAWK->null_str), NO_MAWK_CREATE); + break; + default: + ap = mawk_find_by_sval(MAWK, A, string(cp), NO_MAWK_CREATE); + break; + } + if (ap) { /* remove from the front of the slist */ + DUAL_LINK *table = (DUAL_LINK *) A->ptr; + table[ap->hval & A->hmask].slink = ap->slink; + if (ap->ival != NOT_AN_IVALUE) { + ANODE *p, *q = 0; + int index = ap->ival & A->hmask; + p = table[index].ilink; + while (p != ap) { + q = p; + p = q->ilink; + } + if (q) + q->ilink = p->ilink; + else + table[index].ilink = p->ilink; + } + + free_STRING(ap->sval); + mawk_cell_destroy(MAWK, &ap->cell); + MAWK_ZFREE(MAWK, ap); + if (--A->size == 0) + mawk_array_clear(MAWK, A); + } +} + + +static void mawk_array_load_orig(mawk_state_t *MAWK, mawk_array_t A, int cnt) +{ + mawk_cell_t *cells; /* storage for A[1..cnt] */ + int i; /* index into cells[] */ + + /* destroy the original array and set up the new */ + if (A->type != AY_SPLIT || A->limit < cnt) { + mawk_array_clear(MAWK, A); + A->limit = (cnt & ~3) + 4; + A->ptr = mawk_zmalloc(MAWK, A->limit * sizeof(mawk_cell_t)); + A->type = AY_SPLIT; + } + else + for (i = 0; i < A->size; i++) + mawk_cell_destroy(MAWK, (mawk_cell_t *) A->ptr + i); + + cells = (mawk_cell_t *) A->ptr; + A->size = cnt; + +#define action(idx, sval) \ + cells[idx].type = C_MBSTRN; \ + cells[idx].ptr = (PTR) sval; + + mawk_split_walk(MAWK, cnt, 1, action); + +#undef action +} + +static void mawk_array_clear_orig(mawk_state_t *MAWK, mawk_array_t A) +{ + int i; + ANODE *p, *q; + if (A->type == AY_SPLIT) { + for (i = 0; i < A->size; i++) + mawk_cell_destroy(MAWK, (mawk_cell_t *) A->ptr + i); + mawk_zfree(MAWK, A->ptr, A->limit * sizeof(mawk_cell_t)); + } + else if (A->type & AY_STR) { + DUAL_LINK *table = (DUAL_LINK *) A->ptr; + for (i = 0; i <= A->hmask; i++) { + p = table[i].slink; + while (p) { + q = p; + p = q->slink; + free_STRING(q->sval); + mawk_cell_destroy(MAWK, &q->cell); + MAWK_ZFREE(MAWK, q); + } + } + mawk_zfree(MAWK, A->ptr, (A->hmask + 1) * sizeof(DUAL_LINK)); + } + else if (A->type & AY_INT) { + DUAL_LINK *table = (DUAL_LINK *) A->ptr; + for (i = 0; i <= A->hmask; i++) { + p = table[i].ilink; + while (p) { + q = p; + p = q->ilink; + mawk_cell_destroy(MAWK, &q->cell); + MAWK_ZFREE(MAWK, q); + } + } + mawk_zfree(MAWK, A->ptr, (A->hmask + 1) * sizeof(DUAL_LINK)); + } + mawk_array_clear_common(MAWK, A); +} + + + +static mawk_string_t **mawk_array_loop_vector_orig(mawk_state_t *MAWK, mawk_array_t A, unsigned *sizep) +{ + mawk_string_t **ret; + *sizep = A->size; + if (A->size > 0) { + if (!(A->type & AY_STR)) + add_string_associations(MAWK, A); + ret = (mawk_string_t **) mawk_malloc(MAWK, A->size * sizeof(mawk_string_t *)); + { + int r = 0; /* indexes ret */ + DUAL_LINK *table = (DUAL_LINK *) A->ptr; + int i; /* indexes table */ + ANODE *p; /* walks slists */ + for (i = 0; i <= A->hmask; i++) { + for (p = table[i].slink; p; p = p->slink) { + ret[r++] = p->sval; + p->sval->ref_cnt++; + } + } + } + + return ret; + } + else + return (mawk_string_t **) 0; +} + +static ANODE *mawk_find_by_ival(mawk_state_t *MAWK, mawk_array_t A, Int ival, int create_flag) +{ + DUAL_LINK *table = (DUAL_LINK *) A->ptr; + unsigned index = ival & A->hmask; + ANODE *p = table[index].ilink; /* walks ilist */ + ANODE *q = (ANODE *) 0; /* trails p */ + while (1) { + if (!p) { + /* search failed */ + if (A->type & AY_STR) { + /* need to search by string */ + char buff[256]; + mawk_string_t *sval; + sprintf(buff, INT_FMT, ival); + sval = mawk_new_STRING(MAWK, buff); + p = mawk_find_by_sval(MAWK, A, sval, create_flag); + free_STRING(sval); + if (!p) + return (ANODE *) 0; + } + else if (create_flag) { + p = MAWK_ZMALLOC(MAWK, ANODE); + p->sval = (mawk_string_t *) 0; + p->cell.type = C_NOINIT; + if (++A->size > A->limit) { + double_the_hash_table(MAWK, A); /* changes table, may change index */ + table = (DUAL_LINK *) A->ptr; + index = A->hmask & ival; + } + } + else + return (ANODE *) 0; + p->ival = ival; + A->type |= AY_INT; + + break; + } + else if (p->ival == ival) { + /* found it, now move to the front */ + if (!q) /* already at the front */ + return p; + /* delete for mawk_insertion at the front */ + q->ilink = p->ilink; + break; + } + q = p; + p = q->ilink; + } + /* mawk_insert at the front */ + p->ilink = table[index].ilink; + table[index].ilink = p; + return p; +} + +static ANODE *mawk_find_by_sval(mawk_state_t *MAWK, mawk_array_t A, mawk_string_t *sval, int create_flag) +{ + unsigned hval = ahash(sval); + char *str = sval->str; + DUAL_LINK *table; + int index; + ANODE *p; /* walks list */ + ANODE *q = (ANODE *) 0; /* trails p */ + if (!(A->type & AY_STR)) + add_string_associations(MAWK, A); + table = (DUAL_LINK *) A->ptr; + index = hval & A->hmask; + p = table[index].slink; + while (1) { + if (!p) { + if (create_flag) { + { + p = MAWK_ZMALLOC(MAWK, ANODE); + p->sval = sval; + sval->ref_cnt++; + p->ival = NOT_AN_IVALUE; + p->hval = hval; + p->cell.type = C_NOINIT; + if (++A->size > A->limit) { + double_the_hash_table(MAWK, A); /* changes table, may change index */ + table = (DUAL_LINK *) A->ptr; + index = hval & A->hmask; + } + } + + break; + } + else + return (ANODE *) 0; + } + else if (p->hval == hval && strcmp(p->sval->str, str) == 0) { + /* found */ + if (!q) /* already at the front */ + return p; + else { /* delete for move to the front */ + q->slink = p->slink; + break; + } + } + q = p; + p = q->slink; + } + p->slink = table[index].slink; + table[index].slink = p; + return p; +} + +static void add_string_associations(mawk_state_t *MAWK, mawk_array_t A) +{ + if (A->type == AY_NULL) + make_empty_table(MAWK, A, AY_STR); + else { + DUAL_LINK *table; + int i; /* walks table */ + ANODE *p; /* walks ilist */ + char buff[256]; + if (A->type == AY_SPLIT) + convert_split_array_to_table(MAWK, A); + table = (DUAL_LINK *) A->ptr; + for (i = 0; i <= A->hmask; i++) { + p = table[i].ilink; + while (p) { + sprintf(buff, INT_FMT, p->ival); + p->sval = mawk_new_STRING(MAWK, buff); + p->hval = ahash(p->sval); + p->slink = table[A->hmask & p->hval].slink; + table[A->hmask & p->hval].slink = p; + p = p->ilink; + } + } + A->type |= AY_STR; + } +} + +static void make_empty_table(mawk_state_t *MAWK, mawk_array_t A, int type) +{ + /* type -> AY_INT or AY_STR */ + size_t sz = (STARTING_HMASK + 1) * sizeof(DUAL_LINK); + + A->ptr = mawk_zmalloc(MAWK, sz); + if (A->ptr != NULL) { + A->type = type; + A->hmask = STARTING_HMASK; + A->limit = hmask_to_limit(STARTING_HMASK); + memset(A->ptr, 0, sz); + } + else { + A->limit = 0; + } +} + +static void convert_split_array_to_table(mawk_state_t *MAWK, mawk_array_t A) +{ + mawk_cell_t *cells = (mawk_cell_t *) A->ptr; + int i; /* walks cells */ + DUAL_LINK *table; + int j; /* walks table */ + unsigned entry_limit = A->limit; + A->hmask = STARTING_HMASK; + A->limit = hmask_to_limit(STARTING_HMASK); + while (A->size > A->limit) { + A->hmask = (A->hmask << 1) + 1; /* double the size */ + A->limit = hmask_to_limit(A->hmask); + } + { + size_t sz = (A->hmask + 1) * sizeof(DUAL_LINK); + A->ptr = memset(mawk_zmalloc(MAWK, sz), 0, sz); + table = (DUAL_LINK *) A->ptr; + } + + + /* mawk_insert each cells[i] in the new mawk_hash table on an ilist */ + for (i = 0, j = 1; i < A->size; i++) { + ANODE *p = MAWK_ZMALLOC(MAWK, ANODE); + p->sval = (mawk_string_t *) 0; + p->ival = i + 1; + p->cell = cells[i]; + p->ilink = table[j].ilink; + table[j].ilink = p; + j++; + j &= A->hmask; + } + A->type = AY_INT; + mawk_zfree(MAWK, cells, entry_limit * sizeof(mawk_cell_t)); +} + +static void double_the_hash_table(mawk_state_t *MAWK, mawk_array_t A) +{ + unsigned old_hmask = A->hmask; + unsigned new_hmask = (old_hmask << 1) + 1; + DUAL_LINK *table; + A->ptr = mawk_zrealloc(MAWK, A->ptr, (old_hmask + 1) * sizeof(DUAL_LINK), (new_hmask + 1) * sizeof(DUAL_LINK)); + table = (DUAL_LINK *) A->ptr; + /* zero out the new part which is the back half */ + memset(&table[old_hmask + 1], 0, (old_hmask + 1) * sizeof(DUAL_LINK)); + + if (A->type & AY_STR) { + int i; /* index to old lists */ + int j; /* index to new lists */ + ANODE *p; /* walks an old list */ + ANODE *q; /* trails p for deletion */ + ANODE *tail; /* builds new list from the back */ + ANODE dummy0, dummy1; + for (i = 0, j = old_hmask + 1; i <= old_hmask; i++, j++) { + q = &dummy0; + q->slink = p = table[i].slink; + tail = &dummy1; + while (p) { + if ((p->hval & new_hmask) != i) { /* move it */ + q->slink = p->slink; + tail = tail->slink = p; + } + else + q = p; + p = q->slink; + } + table[i].slink = dummy0.slink; + tail->slink = (ANODE *) 0; + table[j].slink = dummy1.slink; + } + + } + + if (A->type & AY_INT) { + int i; /* index to old lists */ + int j; /* index to new lists */ + ANODE *p; /* walks an old list */ + ANODE *q; /* trails p for deletion */ + ANODE *tail; /* builds new list from the back */ + ANODE dummy0, dummy1; + for (i = 0, j = old_hmask + 1; i <= old_hmask; i++, j++) { + q = &dummy0; + q->ilink = p = table[i].ilink; + tail = &dummy1; + while (p) { + if ((p->ival & new_hmask) != i) { /* move it */ + q->ilink = p->ilink; + tail = tail->ilink = p; + } + else + q = p; + p = q->ilink; + } + table[i].ilink = dummy0.ilink; + tail->ilink = (ANODE *) 0; + table[j].ilink = dummy1.ilink; + } + + } + + A->hmask = new_hmask; + A->limit = hmask_to_limit(new_hmask); +} + + +static unsigned ahash(mawk_string_t *sval) +{ + unsigned sum1 = sval->len; + unsigned sum2 = sum1; + unsigned char *p, *q; + if (sum1 <= 10) { + for (p = (unsigned char *) sval->str; *p; p++) { + sum1 += sum1 + *p; + sum2 += sum1; + } + } + else { + int cnt = 5; + p = (unsigned char *) sval->str; /* p starts at the front */ + q = (unsigned char *) sval->str + (sum1 - 1); /* q starts at the back */ + while (cnt) { + cnt--; + sum1 += sum1 + *p; + sum2 += sum1; + sum1 += sum1 + *q; + sum2 += sum1; + p++; + q--; + } + } + return sum2; +} + + +/* iterator for array_generic (for array implementations built on top or array_orig */ +typedef struct { + int i; /* indexes table */ + ANODE *p; /* walks slists */ + mawk_cell_t ridx; +} array_it_orig_t; + +void *mawk_array_it_start_orig(mawk_state_t *MAWK, mawk_array_t A) +{ + array_it_orig_t *it; + + if (A->size > 0) { + if (!(A->type & AY_STR)) + add_string_associations(MAWK, A); + } + + it = mawk_zmalloc(MAWK, sizeof(array_it_orig_t)); + if (it == NULL) + return NULL; + it->i = -1; + it->p = NULL; + it->ridx.type = C_STRING; + return it; +} + +const mawk_cell_t *mawk_array_it_next_orig(mawk_state_t *MAWK, mawk_array_t A, void *iterator) +{ + DUAL_LINK *table = (DUAL_LINK *) A->ptr; + array_it_orig_t *it = iterator; + + if (A->size <= 0) + return NULL; + + if ((it->i >= 0) && (it->i > A->hmask)) + return NULL; + + if (it->p == NULL) { + /* this slot is over */ + for(;;) { + it->i++; /* check next slot in the array of linked lists */ + if (it->i > A->hmask) + return NULL; /* hit end of table */ + it->p = table[it->i].slink; + if (it->p != NULL) + break; /* found an entry, go on returning it */ + } + } + + it->ridx.ptr = it->p->sval; + it->p = it->p->slink; + return &it->ridx; +} + +void mawk_array_it_stop_orig(mawk_state_t *MAWK, mawk_array_t A, void *iterator) +{ + array_it_orig_t *it = iterator; + mawk_zfree(MAWK, it, sizeof(array_it_orig_t)); +} + + +/* public implementation array */ +array_imp_t mawk_array_orig_imp = { + mawk_array_find_orig, + mawk_array_set_orig, + mawk_array_delete_orig, + + mawk_array_clear_orig, + mawk_array_loop_vector_orig, + mawk_array_load_orig, + + mawk_array_it_start_orig, + mawk_array_it_next_orig, + mawk_array_it_stop_orig +}; + + diff --git a/src/libmawk/array_orig.h b/src/libmawk/array_orig.h new file mode 100644 index 0000000..92102fc --- /dev/null +++ b/src/libmawk/array_orig.h @@ -0,0 +1,20 @@ +/* +libmawk (C) 2014, Tibor 'Igor2' Palinkas; + +libMawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +*/ + +#ifndef ARRAY_ORIG_H +#define ARRAY_ORIG_H 1 + +/* the original mawk array implementation, large, cryptic and well optimized */ +extern array_imp_t mawk_array_orig_imp; + + +/* iterator - used by custom array implementations built on top of array_orig */ +void *mawk_array_it_start_orig(mawk_state_t *MAWK, mawk_array_t A); +const mawk_cell_t *mawk_array_it_next_orig(mawk_state_t *MAWK, mawk_array_t A, void *iterator); +void mawk_array_it_stop_orig(mawk_state_t *MAWK, mawk_array_t A, void *iterator); + +#endif /* ARRAY_ORIG_H */ diff --git a/src/libmawk/bi_funct.c b/src/libmawk/bi_funct.c new file mode 100644 index 0000000..1dccdb5 --- /dev/null +++ b/src/libmawk/bi_funct.c @@ -0,0 +1,1099 @@ + +/******************************************** +bi_funct.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "bi_funct.h" +#include "bi_vars.h" +#include "memory.h" +#include "init.h" +#include "files.h" +#include "fin.h" +#include "field.h" +#include "regexp.h" +#include "repl.h" +#include "num.h" +#include "math_wrap.h" +#include "vio.h" +#include "cell.h" +#include +#include + + +/* statics */ +static mawk_string_t *gsub(mawk_state_t *, PTR, mawk_cell_t *, char *, char *, int); +static void fplib_err(mawk_state_t *, char *, mawk_num_t, char *); + +/**************************************************************** + string builtins (except split (in split.c) and [g]sub (at end)) + ****************************************************************/ + +mawk_cell_t *mawk_bi_length(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + unsigned len; + + if (sp->type == 0) + mawk_cellcpy(MAWK, sp, MAWK->field); + else + sp--; + + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + len = string(sp)->len; + + free_STRING(string(sp)); + sp->type = C_NUM; + sp->d.dval = (double) len; + + return sp; +} + +char *mawk_str_str(register char *target, char *key, unsigned key_len) +{ + register int k = key[0]; + + switch (key_len) { + case 0: + return (char *) 0; + case 1: + return strchr(target, k); + case 2: + { + int k1 = key[1]; + while ((target = strchr(target, k))) + if (target[1] == k1) + return target; + else + target++; + /*failed */ + return (char *) 0; + } + } + + key_len--; + while ((target = strchr(target, k))) { + if (strncmp(target + 1, key + 1, key_len) == 0) + return target; + else + target++; + } + /*failed */ + return (char *) 0; +} + + + +mawk_cell_t *mawk_bi_index(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + register int idx; + unsigned len; + char *p; + + sp--; + if (TEST2(sp) != TWO_STRINGS) + mawk_cast2_to_str(MAWK, sp); + + if ((len = string(sp + 1)->len)) + idx = (p = mawk_str_str(string(sp)->str, string(sp + 1)->str, len)) + ? p - string(sp)->str + 1 : 0; + + else /* index of the empty string */ + idx = 1; + + free_STRING(string(sp)); + free_STRING(string(sp + 1)); + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) idx; + return sp; +} + +/* substr(s, i, n) + if l = length(s) then get the characters + from max(1,i) to min(l,n-i-1) inclusive */ + +mawk_cell_t *mawk_bi_substr(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + int n_args, len; + register int i, n; + mawk_string_t *sval; /* substr(sval->str, i, n) */ + + n_args = sp->type; + sp -= n_args; + if (sp->type != C_STRING) + mawk_cast1_to_str(MAWK, sp); + /* don't use < C_STRING shortcut */ + sval = string(sp); + + if ((len = sval->len) == 0) { /* substr on null string */ + if (n_args == 3) { + mawk_cell_destroy(MAWK, sp + 2); + } + mawk_cell_destroy(MAWK, sp + 1); + return sp; + } + + if (n_args == 2) { + n = MAX__INT; + if (sp[1].type != C_NUM) + mawk_cast1_to_num(MAWK, sp + 1); + } + else { + if (TEST2(sp + 1) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp + 1); + n = d_to_i(sp[2].d.dval); + } + i = d_to_i(sp[1].d.dval) - 1; /* i now indexes into string */ + + if (i < 0) + i = 0; + + if (n > len - i) + n = len - i; + + if (n <= 0) { /* the null string */ + sp->ptr = (PTR) & (MAWK->null_str); + (MAWK->null_str.ref_cnt)++; + } + else { /* got something */ + + sp->ptr = (PTR) mawk_new_STRING0(MAWK, n); + memcpy(string(sp)->str, sval->str + i, n); + } + + free_STRING(sval); + return sp; +} + +/* + match(s,r) + sp[0] holds r, sp[-1] holds s +*/ + +mawk_cell_t *mawk_bi_match(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + char *p; + unsigned length; + + if (sp->type != C_RE) + mawk_cast_to_RE(MAWK, sp); + if ((--sp)->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + + mawk_cell_destroy(MAWK, RSTART); + mawk_cell_destroy(MAWK, RLENGTH); + RSTART->type = C_NUM; + RLENGTH->type = C_NUM; + + p = mawk_REmatch(MAWK, string(sp)->str, (sp + 1)->ptr, &length, 0); + + if (p) { + sp->d.dval = (mawk_num_t) (p - string(sp)->str + 1); + RLENGTH->d.dval = (mawk_num_t) length; + } + else { + sp->d.dval = MAWK_NUM_ZERO; + RLENGTH->d.dval = -MAWK_NUM_ONE; /* posix */ + } + + free_STRING(string(sp)); + sp->type = C_NUM; + + RSTART->d.dval = sp->d.dval; + + return sp; +} + +mawk_cell_t *mawk_bi_toupper(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_string_t *old; + register char *p, *q; + + if (sp->type != C_STRING) + mawk_cast1_to_str(MAWK, sp); + old = string(sp); + sp->ptr = (PTR) mawk_new_STRING0(MAWK, old->len); + + q = string(sp)->str; + p = old->str; + while (*p) { + *q = *p++; + if (*q >= 'a' && *q <= 'z') + *q += 'A' - 'a'; + q++; + } + free_STRING(old); + return sp; +} + +mawk_cell_t *mawk_bi_tolower(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_string_t *old; + register char *p, *q; + + if (sp->type != C_STRING) + mawk_cast1_to_str(MAWK, sp); + old = string(sp); + sp->ptr = (PTR) mawk_new_STRING0(MAWK, old->len); + + q = string(sp)->str; + p = old->str; + while (*p) { + *q = *p++; + if (*q >= 'A' && *q <= 'Z') + *q += 'a' - 'A'; + q++; + } + free_STRING(old); + return sp; +} + + +/************************************************ + arithemetic builtins + ************************************************/ + +static void fplib_err(mawk_state_t *MAWK, char *fname, mawk_num_t val, char *error) +{ + mawk_rt_error(MAWK, "%s(" NUM_FMT ") : %s", fname, val, error); +} + +#ifndef MAWK_NO_FLOAT +mawk_cell_t *mawk_bi_sin(mawk_state_t *MAWK, mawk_cell_t *sp) +{ +#if ! STDC_MATHERR + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + sp->d.dval = sin(sp->d.dval); + return sp; +#else + mawk_num_t x; + + errno = 0; + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + x = sp->d.dval; + sp->d.dval = sin(sp->d.dval); + if (errno) + fplib_err(MAWK, "sin", x, "loss of precision"); + return sp; +#endif +} + +mawk_cell_t *mawk_bi_cos(mawk_state_t *MAWK, mawk_cell_t *sp) +{ +#if ! STDC_MATHERR + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + sp->d.dval = cos(sp->d.dval); + return sp; +#else + mawk_num_t x; + + errno = 0; + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + x = sp->d.dval; + sp->d.dval = cos(sp->d.dval); + if (errno) + fplib_err(MAWK, "cos", x, "loss of precision"); + return sp; +#endif +} + +mawk_cell_t *mawk_bi_atan2(mawk_state_t *MAWK, mawk_cell_t *sp) +{ +#if ! STDC_MATHERR + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + sp->d.dval = atan2(sp->d.dval, (sp + 1)->d.dval); + return sp; +#else + + errno = 0; + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + sp->d.dval = atan2(sp->d.dval, (sp + 1)->d.dval); + if (errno) + mawk_rt_error(MAWK, "atan2(0,0) : domain error"); + return sp; +#endif +} + +mawk_cell_t *mawk_bi_log(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_num_t x; + + errno = 0; + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + x = sp->d.dval; + PM_BEGIN + sp->d.dval = P_log(sp->d.dval); + PM_ERROR +/* temporary workaround until the final nan support is there */ + sp->d.dval = P_nan(); +/* fplib_err(MAWK, "log", x, "domain error");*/ + PM_END; + return sp; +} + +mawk_cell_t *mawk_bi_exp(mawk_state_t *MAWK, mawk_cell_t *sp) +{ +#if ! STDC_MATHERR + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + sp->d.dval = exp(sp->d.dval); + return sp; +#else + mawk_num_t x; + + errno = 0; + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + x = sp->d.dval; + sp->d.dval = exp(sp->d.dval); + if (errno && sp->d.dval) + fplib_err(MAWK, "exp", x, "mawk_overflow"); + /* on underflow sp->d.dval==0, ignore */ + return sp; +#endif +} +#endif + +mawk_cell_t *mawk_bi_int(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + sp->d.dval = mawk_num_int(sp->d.dval); + return sp; +} + +mawk_cell_t *mawk_bi_sqrt(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_num_t x; + + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + + x = sp->d.dval; + + if (x < 0) { + sp->d.dval = P_nan(); + return sp; + } + +#if ! STDC_MATHERR + sp->d.dval = mawk_num_sqrt(x); + return sp; +#else + errno = 0; + sp->d.dval = mawk_num_sqrt(x); + if (errno) + fplib_err(MAWK, "sqrt", x, "domain error"); + return sp; +#endif +} + +#ifndef NO_TIME_H +#include +#else +#include +#endif + + +/* For portability, we'll use our own random number generator , taken + from: Park, SK and Miller KW, "Random Number Generators: + Good Ones are Hard to Find", CACM, 31, 1192-1201, 1988. +*/ + + +#define M 0x7fffffff /* 2^31-1 */ +#define MX 0xffffffff +#define A 16807 +#define Q 127773 /* M/A */ +#define R 2836 /* M%A */ + +#if M == MAX__LONG +#define crank(s) s = A * (s % Q) - R * (s / Q) ;\ + if ( s <= 0 ) s += M +#else +/* 64 bit longs */ +#define crank(s) { unsigned long t = s ;\ + t = (A * (t % Q) - R * (t / Q)) & MX ;\ + if ( t >= M ) t = (t+M)&M ;\ + s = t ;\ + } +#endif + + +mawk_cell_t *mawk_bi_srand(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_cell_t c; + + if (sp->type == 0) { /* seed off clock */ + mawk_cellcpy(MAWK, sp, &MAWK->cseed); + mawk_cell_destroy(MAWK, &MAWK->cseed); + MAWK->cseed.type = C_NUM; + MAWK->cseed.d.dval = (mawk_num_t) time((time_t *) 0); + } + else { /* user seed */ + + sp--; + /* swap cseed and *sp ; don't need to adjust ref_cnts */ + c = *sp; + *sp = MAWK->cseed; + MAWK->cseed = c; + } + + /* The old seed is now in *sp ; move the value in cseed to + seed in range [1,M) */ + + mawk_cellcpy(MAWK, &c, &MAWK->cseed); + if (c.type == C_NOINIT) + mawk_cast1_to_num(MAWK, &c); + + MAWK->seed = c.type == C_NUM ? (d_to_i(c.d.dval) & M) % M + 1 : mawk_hash(string(&c)->str) % M + 1; + if (MAWK->seed == M) + MAWK->seed = M - 1; + + mawk_cell_destroy(MAWK, &c); + + /* crank it once so close seeds don't give a close + first result */ + crank(MAWK->seed); + + return sp; +} + +mawk_cell_t *mawk_bi_rand(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + crank(MAWK->seed); + sp++; + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) MAWK->seed / (mawk_num_t) M; + return sp; +} + +#undef A +#undef M +#undef MX +#undef Q +#undef R +#undef crank + +/************************************************* + miscellaneous builtins + close, system and getline + fflush + *************************************************/ + +mawk_cell_t *mawk_bi_close(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + int x; + + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + x = mawk_file_close(MAWK, (mawk_string_t *) sp->ptr); + free_STRING(string(sp)); + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) x; + return sp; +} + + +mawk_cell_t *mawk_bi_fflush(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + int ret = 0; + + if (sp->type == 0) + mawk_vio_flush(MAWK, MAWK->fnode_stdout->vf); + else { + sp--; + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + ret = mawk_file_flush(MAWK, string(sp)); + free_STRING(string(sp)); + } + + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) ret; + return sp; +} + +#ifndef MAWK_NO_FORK +mawk_cell_t *mawk_bi_system(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + int pid; + unsigned ret_val; + + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + + mawk_flush_all_output(MAWK); + switch (pid = fork()) { + case -1: /* fork failed */ + + mawk_errmsg(MAWK, errno, "could not create a new process"); + ret_val = 127; + break; + + case 0: /* the child */ + mawk_vio_exec_shell(MAWK, string(sp)->str); + + default: /* wait for the child */ + ret_val = mawk_wait_for(MAWK, pid); + break; + } + + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) ret_val; + return sp; +} +#endif + +/* getline() */ + +/* if type == 0 : stack is 0 , target address + + if type == F_IN : stack is F_IN, expr(filename), target address + if type == PIPE_IN : stack is PIPE_IN, target address, expr(pipename) +*/ + +mawk_cell_t *mawk_bi_getline(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_cell_t tc, *cp; + char *p; + unsigned len; + FILE_NODE *fnode = NULL; + + switch (sp->type) { + case 0: + sp--; + if (!MAWK->main_input) + mawk_FINopen_main(MAWK); + + p = mawk_FINgets(MAWK, MAWK->main_input, &len); + if (p == (void *) mawk_FIN_nomore) + goto nomore; + + if (!p) + goto eof; + + cp = (mawk_cell_t *) sp; + if (TEST2(NR) != TWO_NUMS) + mawk_cast2_to_num(MAWK, NR); + NR->d.dval += MAWK_NUM_ONE; + MAWK->rt_nr++; + FNR->d.dval += MAWK_NUM_ONE; + MAWK->rt_fnr++; + break; + + case F_IN: + sp--; + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + fnode = mawk_file_find(MAWK, sp->ptr, F_IN, 1); + free_STRING(string(sp)); + sp--; + + if (fnode == NULL) + goto open_failure; + + p = mawk_FINgets(MAWK, fnode, &len); + if (p == (void *) mawk_FIN_nomore) + goto nomore; + + if (!p) { + mawk_file_close_(MAWK, fnode); + goto eof; + } + cp = sp; + break; + + case PIPE_IN: + sp -= 2; + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + fnode = mawk_file_find(MAWK, sp->ptr, PIPE_IN, 1); + free_STRING(string(sp)); + + if (fnode == NULL) + goto open_failure; + + p = mawk_FINgets(MAWK, fnode, &len); + if (p == (void *) mawk_FIN_nomore) + goto nomore; + + if (!p) { + mawk_file_close_(MAWK, fnode); + goto eof; + } + cp = (sp + 1); + break; + + default: + mawk_bozo(MAWK, "type in mawk_bi_getline"); + + } + + /* we've read a line , store it */ + + if (len == 0) { + tc.type = C_STRING; + tc.ptr = (PTR) & (MAWK->null_str); + MAWK->null_str.ref_cnt++; + } + else { + tc.type = C_MBSTRN; + tc.ptr = (PTR) mawk_new_STRING0(MAWK, len); + memcpy(string(&tc)->str, p, len); + } + + mawk_bifunct_target_assign(MAWK, cp, &tc); + + mawk_cell_destroy(MAWK, &tc); + + sp->d.dval = MAWK_NUM_ONE; + goto done; +open_failure:; + sp->d.dval = -MAWK_NUM_ONE; + goto done; +eof:; + sp->d.dval = MAWK_NUM_ZERO; /* fall thru to done */ + +done:; + sp->type = C_NUM; + return sp; + +nomore:; + sp->type = C_REQ_NOMORE; + return sp; +} + +/********************************************** + sub() and gsub() + **********************************************/ + +/* entry: sp[0] = address of mawk_cell_t to sub on + sp[-1] = substitution CELL + sp[-2] = regular expression to match +*/ + +mawk_cell_t *mawk_bi_sub(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_cell_t *cp; /* pointer to the replacement target */ + mawk_cell_t tc; /* build the new string here */ + mawk_cell_t sc; /* copy of the target mawk_cell_t */ + char *front, *middle, *back; /* pieces */ + unsigned front_len, middle_len, back_len; + + sp -= 2; + if (sp->type != C_RE) + mawk_cast_to_RE(MAWK, sp); + if (sp[1].type != C_REPL && sp[1].type != C_REPLV) + mawk_cast_to_REPL(MAWK, sp + 1); + cp = (mawk_cell_t *) (sp + 2); + + /* make a copy of the target, because we won't change anything + including type unless the match works */ + if (cp->type == C_ARR_REF_BT) { + sc.type = C_NOINIT; + mawk_array_find(MAWK, (mawk_array_t)cp->ptr, cp->d.idx_cell, &sc, 1); + } + else + mawk_cellcpy(MAWK, &sc, cp->ptr); + + + if (sc.type < C_STRING) + mawk_cast1_to_str(MAWK, &sc); + front = string(&sc)->str; + + if ((middle = mawk_REmatch(MAWK, front, sp->ptr, &middle_len, 0))) { + front_len = middle - front; + back = middle + middle_len; + back_len = string(&sc)->len - front_len - middle_len; + + if ((sp + 1)->type == C_REPLV) { + mawk_string_t *sval = mawk_new_STRING0(MAWK, middle_len); + + memcpy(sval->str, middle, middle_len); + mawk_replv_to_repl(MAWK, sp + 1, sval); + free_STRING(sval); + } + + tc.type = C_STRING; + tc.ptr = (PTR) mawk_new_STRING0(MAWK, front_len + string(sp + 1)->len + back_len); + + { + char *p = string(&tc)->str; + + if (front_len) { + memcpy(p, front, front_len); + p += front_len; + } + if (string(sp + 1)->len) { + memcpy(p, string(sp + 1)->str, string(sp + 1)->len); + p += string(sp + 1)->len; + } + if (back_len) + memcpy(p, back, back_len); + } + + mawk_bifunct_target_assign(MAWK, cp, &tc); + + free_STRING(string(&tc)); + } + + free_STRING(string(&sc)); + mawk_repl_destroy(MAWK, sp + 1); + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) (middle != (char *) 0 ? MAWK_NUM_ONE : MAWK_NUM_ZERO); + return sp; +} + +/* recursive global subsitution + dealing with empty matches makes this mildly painful +*/ + +static mawk_string_t *gsub(mawk_state_t *MAWK, PTR re, mawk_cell_t *repl, char *target, char *orig_target, int flag) +{ + /*target -> if on, match of empty string at front is OK */ + /* repl -> always of type REPL or REPLV, destroyed by caller */ + /* flag -> if on, match of empty string at front is OK */ + char *front, *middle; + mawk_string_t *back; + unsigned front_len, middle_len; + mawk_string_t *ret_val; + mawk_cell_t xrepl; /* a copy of repl so we can change repl */ + + if (!(middle = mawk_REmatch(MAWK, target, re, &middle_len, target != orig_target))) + return mawk_new_STRING(MAWK, target); /* no match */ + + mawk_cellcpy(MAWK, &xrepl, repl); + + if (!flag && middle_len == 0 && middle == target) { /* match at front that's not allowed */ + + if (*target == 0) { /* target is empty string */ + mawk_repl_destroy(MAWK, &xrepl); + MAWK->null_str.ref_cnt++; + return &(MAWK->null_str); + } + else { + char xbuff[2]; + + front_len = 0; + /* make new repl with target[0] */ + mawk_repl_destroy(MAWK, repl); + xbuff[0] = *target++; + xbuff[1] = 0; + repl->type = C_REPL; + repl->ptr = (PTR) mawk_new_STRING(MAWK, xbuff); + back = gsub(MAWK, re, &xrepl, target, orig_target, 1); + } + } + else { /* a match that counts */ + + MAWK->repl_cnt++; + + front = target; + front_len = middle - target; + + if (*middle == 0) { /* matched back of target */ + back = &(MAWK->null_str); + MAWK->null_str.ref_cnt++; + } + else + back = gsub(MAWK, re, &xrepl, middle + middle_len, orig_target, 0); + + /* patch the &'s if needed */ + if (repl->type == C_REPLV) { + mawk_string_t *sval = mawk_new_STRING0(MAWK, middle_len); + + memcpy(sval->str, middle, middle_len); + mawk_replv_to_repl(MAWK, repl, sval); + free_STRING(sval); + } + } + + /* put the three pieces together */ + ret_val = mawk_new_STRING0(MAWK, front_len + string(repl)->len + back->len); + { + char *p = ret_val->str; + + if (front_len) { + memcpy(p, front, front_len); + p += front_len; + } + + if (string(repl)->len) { + memcpy(p, string(repl)->str, string(repl)->len); + p += string(repl)->len; + } + if (back->len) + memcpy(p, back->str, back->len); + } + + /* cleanup, repl is freed by the caller */ + mawk_repl_destroy(MAWK, &xrepl); + free_STRING(back); + + return ret_val; +} + +/* set up for call to gsub() */ +mawk_cell_t *mawk_bi_gsub(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_cell_t *cp; /* pts at the replacement target */ + mawk_cell_t sc; /* copy of replacement target */ + mawk_cell_t tc; /* build the result here */ + + sp -= 2; + if (sp->type != C_RE) + mawk_cast_to_RE(MAWK, sp); + if ((sp + 1)->type != C_REPL && (sp + 1)->type != C_REPLV) + mawk_cast_to_REPL(MAWK, sp + 1); + + cp = (mawk_cell_t *) (sp + 2); + if (cp->type == C_ARR_REF_BT) { + sc.type = C_NOINIT; + mawk_array_find(MAWK, (mawk_array_t)cp->ptr, cp->d.idx_cell, &sc, 1); + } + else + mawk_cellcpy(MAWK, &sc, cp->ptr); + + if (sc.type < C_STRING) + mawk_cast1_to_str(MAWK, &sc); + + MAWK->repl_cnt = 0; + tc.ptr = (PTR) gsub(MAWK, sp->ptr, sp + 1, string(&sc)->str, string(&sc)->str, 1); + + if (MAWK->repl_cnt) { + tc.type = C_STRING; + mawk_bifunct_target_assign(MAWK, cp, &tc); + } + + /* cleanup */ + free_STRING(string(&sc)); + free_STRING(string(&tc)); + mawk_repl_destroy(MAWK, sp + 1); + + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) MAWK->repl_cnt; + return sp; +} + + +mawk_cell_t *mawk_bi_call(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + int i, numargs; + SYMTAB *fs; + const char *fn; +/* mawk_cell_t *ret;*/ + + numargs = sp->type; + sp -= numargs; + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + fn = string(sp)->str; + + fs = mawk_find(MAWK, fn, 0); + + if ((fs == NULL) || (fs->type != ST_FUNCT)) { + /* does not exist or not a function */ + if (fs == NULL) + mawk_set_errno(MAWK, "1 object does not exist"); + else + mawk_set_errno(MAWK, "2 object is not a function"); + +#warning this should be some common code in execute.c? + for (i = 0; i < numargs; i++) { + mawk_cell_destroy(MAWK, &sp[i]); + sp[i].type = C_NOINIT; + sp[i].d.dval = MAWK_NUM_ZERO; + } + sp[numargs].type = C_NOINIT; + sp[numargs].d.dval = MAWK_NUM_ZERO; + + return sp; + } + + mawk_set_errno(MAWK, ""); +/* ret = mawk_call(MAWK, sp + numargs - 1, fs->stval.fbp, numargs - 1);*/ + + /* shift args down to replace first arg (function name) with the first + actual arg passed to the function */ + for (i = 0; i < numargs; i++) + sp[i] = sp[i+1]; + sp += numargs-1; + + /* perform the call */ + sp->type = C_NUM; + sp->d.dval = numargs - 1; + inc_sp(); + sp->type = C_REQ_CALL; + sp->ptr = fs->stval.fbp; + return sp; +} + +mawk_cell_t *mawk_bi_acall(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + int numargs; + SYMTAB *fs, *fa; + const char *fn; + mawk_cell_t *ret, idx; + + numargs = 1; + sp -= numargs; + + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + fn = string(sp)->str; + + fs = mawk_find(MAWK, fn, 0); + + if (fs == NULL) { + /* does not exist */ + mawk_set_errno(MAWK, "1 object does not exist"); + sp++; + goto error; + } + + sp++; + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + + fa = mawk_find(MAWK, string(sp)->str, 0); + if (fa == NULL) { + mawk_set_errno(MAWK, "3 argument array does not exist"); + goto error; + } + + if (fa->type != ST_ARRAY) { + mawk_set_errno(MAWK, "4 argument array is not an array"); + goto error; + } + + sp -= 2; + for (numargs = 1;; numargs++) { + inc_sp(); + idx.type = C_NUM; + idx.d.dval = numargs; + if (mawk_array_find(MAWK, fa->stval.array, &idx, sp, 0) == 0) + break; + } + + if (fs->type == ST_FUNCT) { + /* perform the call */ + mawk_set_errno(MAWK, ""); + inc_sp(); + sp->type = C_NUM; + sp->d.dval = numargs; + inc_sp(); + sp->type = C_REQ_CALL; + sp->ptr = fs->stval.fbp; + return sp; + } + else { + mawk_set_errno(MAWK, "5 object is a function"); + idx.type = C_NOINIT; + idx.d.dval = MAWK_NUM_ZERO; + ret = &idx; + } + + sp -= numargs; + sp--; +#warning TODO: cell destroy all the allocated but unused args? + return sp; + +error:; + mawk_cell_destroy(MAWK, sp); + sp--; + sp->type = C_NOINIT; + return sp; +} + +mawk_cell_t *mawk_bi_valueof(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + int numargs; + SYMTAB *fs; + mawk_cell_t *ret, *idx; + + numargs = sp->type; + sp -= numargs; + + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + + fs = mawk_find(MAWK, string(sp)->str, 0); + + if (fs == NULL) { + /* does not exist */ + mawk_set_errno(MAWK, "1 object does not exist"); + goto error; + } + + if (fs->type == ST_FUNCT) { + mawk_set_errno(MAWK, "5 object is a function"); + goto error; + } + + if (numargs > 1) { + if (fs->type == ST_ARRAY) { + idx = sp + 1; + mawk_array_find(MAWK, fs->stval.array, idx, sp, 0); + goto has_set_sp; + } + else { + mawk_set_errno(MAWK, "6 object is not an array, can not index it"); + goto error; + } + } + else { + if (fs->type == ST_ARRAY) { + mawk_set_errno(MAWK, "7 object is an array, need to index it"); + goto error; + } + ret = fs->stval.cp; + } + + if ((ret != NULL) && ((fs->type == ST_VAR) || (fs->type == ST_KEYWORD))) { + mawk_cell_destroy(MAWK, sp); + *sp = *ret; + has_set_sp:; + mawk_set_errno(MAWK, ""); + return sp; + } + + if (ret != NULL) + mawk_set_errno(MAWK, "8 object is not a variable (and not a function either)"); + +error:; + mawk_cell_destroy(MAWK, sp); + sp->type = C_NOINIT; + return sp; +} + +mawk_cell_t *mawk_bi_isnan(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + + sp->type = C_NUM; + sp->d.dval = P_isnan(sp->d.dval); + + return sp--; +} diff --git a/src/libmawk/bi_funct.h b/src/libmawk/bi_funct.h new file mode 100644 index 0000000..5ab795a --- /dev/null +++ b/src/libmawk/bi_funct.h @@ -0,0 +1,61 @@ + +/******************************************** +bi_funct.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef BI_FUNCT_H +#define BI_FUNCT_H 1 + +#include + +extern const BI_REC mawk_bi_funct[]; /* read-only */ + +/* builtin string functions */ +mawk_cell_t *mawk_bi_print(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_printf(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_length(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_index(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_substr(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_sprintf(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_split(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_match(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_getline(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_sub(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_gsub(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_toupper(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_tolower(mawk_state_t *, mawk_cell_t *); + +/* builtin arith functions */ +mawk_cell_t *mawk_bi_sin(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_cos(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_atan2(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_log(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_exp(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_int(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_sqrt(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_srand(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_rand(mawk_state_t *, mawk_cell_t *); + +/* other builtins */ +mawk_cell_t *mawk_bi_close(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_system(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_fflush(mawk_state_t *, mawk_cell_t *); + +/* libmawk extensions */ +mawk_cell_t *mawk_bi_call(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_acall(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_valueof(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_bi_isnan(mawk_state_t *, mawk_cell_t *); + +#endif /* BI_FUNCT_H */ diff --git a/src/libmawk/bi_funct_common.c b/src/libmawk/bi_funct_common.c new file mode 100644 index 0000000..76b3daa --- /dev/null +++ b/src/libmawk/bi_funct_common.c @@ -0,0 +1,89 @@ + +/******************************************** +bi_funct_common.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "bi_funct.h" +#include "bi_vars.h" + + +/* global for the disassembler */ +BI_REC const mawk_bi_funct[] = { /* info to load builtins */ + {"length", mawk_bi_length, 0, 1}, /* special must come first */ + {"index", mawk_bi_index, 2, 2}, + {"substr", mawk_bi_substr, 2, 3}, + {"sprintf", mawk_bi_sprintf, 1, 255}, +#ifndef MAWK_NO_FLOAT + {"sin", mawk_bi_sin, 1, 1}, + {"cos", mawk_bi_cos, 1, 1}, + {"atan2", mawk_bi_atan2, 2, 2}, + {"exp", mawk_bi_exp, 1, 1}, + {"log", mawk_bi_log, 1, 1}, +#endif + {"int", mawk_bi_int, 1, 1}, + {"sqrt", mawk_bi_sqrt, 1, 1}, + {"rand", mawk_bi_rand, 0, 0}, + {"srand", mawk_bi_srand, 0, 1}, + {"close", mawk_bi_close, 1, 1}, +#ifndef MAWK_NO_FORK + {"system", mawk_bi_system, 1, 1}, +#endif + {"toupper", mawk_bi_toupper, 1, 1}, + {"tolower", mawk_bi_tolower, 1, 1}, + {"fflush", mawk_bi_fflush, 0, 1}, + {"call", mawk_bi_call, 1, 255}, + {"acall", mawk_bi_acall, 2, 2}, + {"valueof", mawk_bi_valueof, 1, 2}, + {"isnan", mawk_bi_isnan, 1, 1}, + {(char *) 0, (PF_CP) 0, 0, 0} +}; + + +/* load built-in functions in symbol table */ +void mawk_bi_funct_init(mawk_state_t *MAWK) +{ + const register BI_REC *p; + register SYMTAB *stp; + + /* length is special (posix mawk_bozo) */ + stp = mawk_insert(MAWK, mawk_bi_funct->name); + stp->type = ST_LENGTH; + stp->stval.bip = mawk_bi_funct; + + for (p = mawk_bi_funct + 1; p->name; p++) { + stp = mawk_insert(MAWK, p->name); + stp->type = ST_BUILTIN; + stp->stval.bip = p; + } + + /* seed rand() off the clock */ + { + mawk_cell_t c; + + c.type = 0; + mawk_bi_srand(MAWK, &c); + } + +} + +#ifdef MAWK_MEM_PEDANTIC +void mawk_bi_funct_uninit(mawk_state_t *MAWK) +{ + const register BI_REC *p; + mawk_delete(MAWK, mawk_bi_funct->name, 0); + + for (p = mawk_bi_funct + 1; p->name; p++) + mawk_delete(MAWK, p->name, 0); +} +#endif diff --git a/src/libmawk/bi_funct_dummy.c b/src/libmawk/bi_funct_dummy.c new file mode 100644 index 0000000..3843176 --- /dev/null +++ b/src/libmawk/bi_funct_dummy.c @@ -0,0 +1,163 @@ +/******************************************** +bi_funct.c + +libmawk changes (C) 2009-2013, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "bi_funct.h" +#include "bi_vars.h" + + +mawk_cell_t *mawk_bi_length(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + abort(); +} + +char *mawk_str_str(register char *target, char *key, unsigned key_len) +{ + abort(); +} + + +mawk_cell_t *mawk_bi_index(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_substr(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_match(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_toupper(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_tolower(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +#ifndef MAWK_NO_FLOAT +mawk_cell_t *mawk_bi_sin(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_cos(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_atan2(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_log(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_exp(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} +#endif + +mawk_cell_t *mawk_bi_int(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_sqrt(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + + +mawk_cell_t *mawk_bi_srand(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + /* called from init */ + return NULL; +} + +mawk_cell_t *mawk_bi_rand(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_close(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + + +mawk_cell_t *mawk_bi_fflush(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_system(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + + +mawk_cell_t *mawk_bi_getline(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + + +mawk_cell_t *mawk_bi_sub(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_gsub(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + + +mawk_cell_t *mawk_bi_call(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_acall(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_valueof(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_split(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_isnan(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + abort(); +} + diff --git a/src/libmawk/bi_vars.c b/src/libmawk/bi_vars.c new file mode 100644 index 0000000..5fbec16 --- /dev/null +++ b/src/libmawk/bi_vars.c @@ -0,0 +1,100 @@ + +/******************************************** +bi_vars.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "symtype.h" +#include "bi_vars.h" +#include "field.h" +#include "init.h" +#include "memory.h" +#include "num.h" +#include "cell.h" +#include "array_environ.h" + +/* the order here must match the order in bi_vars.h */ + +static const char *bi_var_names[NUM_BI_VAR] = { + "NR", + "FNR", + "ARGC", + "FILENAME", + "OFS", + "ORS", + "RLENGTH", + "RSTART", + "SUBSEP", + "ERRNO", + "LIBPATH", +}; + +/* mawk_insert the builtin vars in the mawk_hash table */ +void mawk_bi_vars_init(mawk_state_t * MAWK) +{ + register int i; + register SYMTAB *s; + + + for (i = 0; i < NUM_BI_VAR; i++) { + s = mawk_insert(MAWK, bi_var_names[i]); + s->type = i <= 1 ? ST_NR : ST_VAR; + s->stval.cp = MAWK->bi_vars + i; + /* MAWK->bi_vars[i].type = 0 which is C_NOINIT */ + } + + mawk_environ_init(MAWK); + + /* set defaults */ + + FILENAME->type = C_STRING; + FILENAME->ptr = (PTR) mawk_new_STRING(MAWK, ""); + + OFS->type = C_STRING; + OFS->ptr = (PTR) mawk_new_STRING(MAWK, " "); + + ORS->type = C_STRING; + ORS->ptr = (PTR) mawk_new_STRING(MAWK, "\n"); + + SUBSEP->type = C_STRING; + SUBSEP->ptr = (PTR) mawk_new_STRING(MAWK, "\034"); + + NR->type = FNR->type = C_NUM; + /* dval is already 0.0 */ + + LIBPATH->type = C_STRING; + LIBPATH->ptr = (PTR) mawk_new_STRING(MAWK, ";~/.libmawk;/usr/local/lib/libmawk;/usr/lib/libmawk"); + +} + +#ifdef MAWK_MEM_PEDANTIC +void mawk_bi_vars_uninit(mawk_state_t * MAWK) +{ + int i; + + /* these ones should be cell-destroyed */ + mawk_delete(MAWK, "FILENAME", 1); + mawk_delete(MAWK, "OFS", 1); + mawk_delete(MAWK, "ORS", 1); + mawk_delete(MAWK, "SUBSEP", 1); + mawk_delete(MAWK, "LIBPATH", 1); + + for (i = 0; i < NUM_BI_VAR; i++) + mawk_delete(MAWK, bi_var_names[i], 0); + + /* this was not initialized as a bi_var, but it really is after initialization */ + mawk_delete(MAWK, "ARGV", 1); + + mawk_environ_uninit(MAWK); +} +#endif diff --git a/src/libmawk/bi_vars.h b/src/libmawk/bi_vars.h new file mode 100644 index 0000000..3c53380 --- /dev/null +++ b/src/libmawk/bi_vars.h @@ -0,0 +1,45 @@ + +/******************************************** +bi_vars.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef BI_VARS_H +#define BI_VARS_H 1 + + +/* builtin variables NF, RS, FS, OFMT are stored + internally in field[], so side effects of assignment can + be handled +*/ + +/* NR and FNR must be next to each other */ +#define NR (MAWK->bi_vars+0) +#define FNR (MAWK->bi_vars+1) +#define ARGC (MAWK->bi_vars+2) +#define FILENAME (MAWK->bi_vars+3) +#define OFS (MAWK->bi_vars+4) +#define ORS (MAWK->bi_vars+5) +#define RLENGTH (MAWK->bi_vars+6) +#define RSTART (MAWK->bi_vars+7) +#define SUBSEP (MAWK->bi_vars+8) +#define ERRNO (MAWK->bi_vars+9) +#define LIBPATH (MAWK->bi_vars+10) + +#define NUM_BI_VAR 11 + +#ifdef MAWK_MEM_PEDANTIC +void mawk_bi_vars_uninit(mawk_state_t * m); +#endif + +#endif diff --git a/src/libmawk/cast.c b/src/libmawk/cast.c new file mode 100644 index 0000000..4f9361d --- /dev/null +++ b/src/libmawk/cast.c @@ -0,0 +1,341 @@ + +/******************************************** +cast.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include +#include +#include "mawk.h" +#include "field.h" +#include "memory.h" +#include "scan.h" +#include "repl.h" +#include "num.h" +#include "cell.h" + + +void mawk_cast1_to_num(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + switch (cp->type) { + case C_NOINIT: + cp->d.dval = MAWK_NUM_ZERO; + break; + + case C_NUM: + return; + + case C_MBSTRN: + case C_STRING: + { + register mawk_string_t *s = (mawk_string_t *) cp->ptr; + +#if FPE_TRAPS_ON /* look for mawk_overflow error */ + errno = 0; + cp->d.dval = strtonum(s->str, (char **) 0); + if (errno && cp->d.dval != MAWK_NUM_ZERO) /* ignore underflow */ + mawk_rt_error("mawk_overflow converting %s to number", s->str); +#else + cp->d.dval = strtonum(s->str, (char **) 0); +#endif + free_STRING(s); + } + break; + + case C_STRNUM: + /* don't need to convert, but do need to free the mawk_string_t part */ + free_STRING(string(cp)); + break; + + + default: + mawk_bozo(MAWK, "mawk_cast on bad type"); + } + cp->type = C_NUM; +} + +void mawk_cast2_to_num(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + register mawk_string_t *s; + + switch (cp->type) { + case C_NOINIT: + cp->d.dval = MAWK_NUM_ZERO; + break; + + case C_NUM: + goto two; + case C_STRNUM: + free_STRING(string(cp)); + break; + + case C_MBSTRN: + case C_STRING: + s = (mawk_string_t *) cp->ptr; + +#if FPE_TRAPS_ON /* look for mawk_overflow error */ + errno = 0; + cp->d.dval = strtonum(s->str, (char **) 0); + if (errno && cp->d.dval != MAWK_NUM_ZERO) /* ignore underflow */ + mawk_rt_error("mawk_overflow converting %s to number", s->str); +#else + cp->d.dval = strtonum(s->str, (char **) 0); +#endif + free_STRING(s); + break; + + default: + mawk_bozo(MAWK, "mawk_cast on bad type"); + } + cp->type = C_NUM; + +two:cp++; + + switch (cp->type) { + case C_NOINIT: + cp->d.dval = MAWK_NUM_ZERO; + break; + + case C_NUM: + return; + case C_STRNUM: + free_STRING(string(cp)); + break; + + case C_MBSTRN: + case C_STRING: + s = (mawk_string_t *) cp->ptr; + +#if FPE_TRAPS_ON /* look for mawk_overflow error */ + errno = 0; + cp->d.dval = strtonum(s->str, (char **) 0); + if (errno && cp->d.dval != MAWK_NUM_ZERO) /* ignore underflow */ + mawk_rt_error("mawk_overflow converting %s to number", s->str); +#else + cp->d.dval = strtonum(s->str, (char **) 0); +#endif + free_STRING(s); + break; + + default: + mawk_bozo(MAWK, "mawk_cast on bad type"); + } + cp->type = C_NUM; +} + +void mawk_cast1_to_str(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + register Int lval; + char xbuff[260]; + + switch (cp->type) { + case C_NOINIT: + MAWK->null_str.ref_cnt++; + cp->ptr = (PTR) & MAWK->null_str; + break; + + case C_NUM: + + lval = mawk_d_to_I(cp->d.dval); + if (lval == cp->d.dval) + sprintf(xbuff, INT_FMT, lval); + else + sprintf(xbuff, string(MAWK_CONVFMT)->str, cp->d.dval); + + cp->ptr = (PTR) mawk_new_STRING(MAWK, xbuff); + break; + + case C_STRING: + return; + + case C_MBSTRN: + case C_STRNUM: + break; + + default: + mawk_bozo(MAWK, "bad type on mawk_cast"); + } + cp->type = C_STRING; +} + +void mawk_cast2_to_str(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + register Int lval; + char xbuff[260]; + + switch (cp->type) { + case C_NOINIT: + MAWK->null_str.ref_cnt++; + cp->ptr = (PTR) & (MAWK->null_str); + break; + + case C_NUM: + + lval = mawk_d_to_I(cp->d.dval); + if (lval == cp->d.dval) + sprintf(xbuff, INT_FMT, lval); + else + sprintf(xbuff, string(MAWK_CONVFMT)->str, cp->d.dval); + + cp->ptr = (PTR) mawk_new_STRING(MAWK, xbuff); + break; + + case C_STRING: + goto two; + + case C_MBSTRN: + case C_STRNUM: + break; + + default: + mawk_bozo(MAWK, "bad type on mawk_cast"); + } + cp->type = C_STRING; + +two: + cp++; + + switch (cp->type) { + case C_NOINIT: + MAWK->null_str.ref_cnt++; + cp->ptr = (PTR) & MAWK->null_str; + break; + + case C_NUM: + + lval = mawk_d_to_I(cp->d.dval); + if (lval == cp->d.dval) + sprintf(xbuff, INT_FMT, lval); + else + sprintf(xbuff, string(MAWK_CONVFMT)->str, cp->d.dval); + + cp->ptr = (PTR) mawk_new_STRING(MAWK, xbuff); + break; + + case C_STRING: + return; + + case C_MBSTRN: + case C_STRNUM: + break; + + default: + mawk_bozo(MAWK, "bad type on mawk_cast"); + } + cp->type = C_STRING; +} + +void mawk_cast_to_RE(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + register PTR p; + + if (cp->type < C_STRING) + mawk_cast1_to_str(MAWK, cp); + + p = mawk_re_compile(MAWK, string(cp)); + free_STRING(string(cp)); + cp->type = C_RE; + cp->ptr = p; + +} + +void mawk_cast_for_split(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + static const char meta[] = "^$.*+?|[]()"; + char xbuff[] = "\\X"; + int c; + unsigned len; + + if (cp->type < C_STRING) + mawk_cast1_to_str(MAWK, cp); + + if ((len = string(cp)->len) == 1) { + if ((c = string(cp)->str[0]) == ' ') { + free_STRING(string(cp)); + cp->type = C_SPACE; + return; + } + else if (strchr(meta, c)) { + xbuff[1] = c; + free_STRING(string(cp)); + cp->ptr = (PTR) mawk_new_STRING(MAWK, xbuff); + } + } + else if (len == 0) { + free_STRING(string(cp)); + cp->type = C_SNULL; + return; + } + + mawk_cast_to_RE(MAWK, cp); +} + +/* input: cp-> a mawk_cell_t of type C_MBSTRN (maybe strnum) + mawk_test it -- mawk_casting it to the appropriate type + which is C_STRING or C_STRNUM +*/ + +void mawk_check_strnum(mawk_state_t *MAWK, mawk_cell_t *cp) +{ + char *mawk_test; + register unsigned char *s, *q; + + cp->type = C_STRING; /* assume not C_STRNUM */ + s = (unsigned char *) string(cp)->str; + q = s + string(cp)->len; + while (MAWK->scan_code[*s] == SC_SPACE) + s++; + if (s == q) + return; + + while (MAWK->scan_code[q[-1]] == SC_SPACE) + q--; + if (MAWK->scan_code[q[-1]] != SC_DIGIT && q[-1] != '.') + return; + + switch (MAWK->scan_code[*s]) { + case SC_DIGIT: + case SC_PLUS: + case SC_MINUS: + case SC_DOT: + +#if FPE_TRAPS_ON + errno = 0; + cp->d.dval = strtonum((char *) s, &mawk_test); + /* make mawk_overflow pure string */ + if (errno && cp->d.dval != MAWK_NUM_ZERO) + return; +#else + cp->d.dval = strtonum((char *) s, &mawk_test); +#endif + + if ((char *) q <= mawk_test) + cp->type = C_STRNUM; + /* <= instead of == , for some buggy strtod + e.g. Apple Unix */ + } +} + +/* mawk_cast a mawk_cell_t to a replacement cell */ + +void mawk_cast_to_REPL(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + register mawk_string_t *sval; + + if (cp->type < C_STRING) + mawk_cast1_to_str(MAWK, cp); + sval = (mawk_string_t *) cp->ptr; + + mawk_cellcpy(MAWK, cp, mawk_repl_compile(MAWK, sval)); + free_STRING(sval); +} + diff --git a/src/libmawk/cell.c b/src/libmawk/cell.c new file mode 100644 index 0000000..3538648 --- /dev/null +++ b/src/libmawk/cell.c @@ -0,0 +1,93 @@ +/******************************************** +libmawk changes (C) 2009-2014, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-93, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "repl.h" +#include "cell.h" + +char *mawk_bozo_cellcpy = "bad cell passed to mawk_cellcpy()"; + +/* does not assume target was a cell, if so + then caller should have made a previous + call to cell_destroy */ + +#ifdef CELLDEBUG +void DB_mawk_cellcpy(mawk_state_t *MAWK, register mawk_cell_t *target, const register mawk_cell_t *source) +{ + static int cnt = 0; + cell_paranoia_chk(MAWK, source); + switch (target->type = source->type) { + case C_NOINIT: + case C_SPACE: + case C_SNULL: + break; + + case C_NUM: + target->d.dval = source->d.dval; + break; + + case C_STRNUM: + target->d.dval = source->d.dval; + /* fall thru */ + + case C_REPL: + case C_MBSTRN: + case C_STRING: + string(source)->ref_cnt++; + /* fall thru */ + + case C_RE: + target->ptr = source->ptr; + break; + + case C_REPLV: + mawk_replv_cpy(MAWK, target, source); + break; + + default: + mawk_bozo(MAWK, "bad cell passed to mawk_cellcpy()"); + break; + } +} +#endif + +#ifdef CELLDEBUG +#include "zmalloc.h" +void DB_cell_destroy(mawk_state_t *MAWK, register mawk_cell_t *cp) /* HANGOVER time */ +{ + switch (cp->type) { + case C_NOINIT: + case C_NUM: + case C_ARR_REF: + break; + + case C_ARR_REF_BT: + mawk_bozo(MAWK, "ARR_REF_BT destroy"); + break; + + case C_MBSTRN: + case C_STRING: + case C_STRNUM: + if (--string(cp)->ref_cnt == 0) + mawk_zfree(MAWK, string(cp), string(cp)->len + STRING_OH); + break; + + case C_RE: + mawk_bozo(MAWK, "cell destroy called on RE cell"); + default: + mawk_bozo(MAWK, "cell destroy called on bad cell type"); + } + cell_destroy_paranoia_set(cp); +} + +#endif diff --git a/src/libmawk/cell.h b/src/libmawk/cell.h new file mode 100644 index 0000000..ee4870c --- /dev/null +++ b/src/libmawk/cell.h @@ -0,0 +1,73 @@ +#include "repl.h" + + +/* macro to test the type of two adjacent cells */ +#define TEST2(cp) (MAWK->mpow2[(cp)->type]+MAWK->mpow2[((cp)+1)->type]) + +/* paranoia cell checks for cell debug */ +#ifdef CELLDEBUG +#define cell_destroy_paranoia_set(cp) (cp)->type = C_FREED; +#define cell_paranoia_chk(MAWK, cp) \ +do { \ + if ((cp)->type == C_FREED) \ + mawk_bozo(MAWK, "invalid cell reference."); \ +} while (0) +#else +#define cell_destroy_paranoia_set(cp) +#define cell_paranoia_chk(MAWK, cp) +#endif + + +/* cell_destroy: macro or function, for debugging */ +#ifdef CELLDEBUG +#define mawk_cell_destroy(MAWK, cp) DB_cell_destroy(MAWK, cp) +#else +#define mawk_cell_destroy(MAWK, cp) \ +do { \ + if ( (cp)->type >= C_STRING && -- string(cp)->ref_cnt == 0) \ + mawk_zfree(MAWK, string(cp),string(cp)->len+STRING_OH); \ + cell_destroy_paranoia_set(cp); \ +} while(0) +#endif + +/* cellcpy: macro or function, for debugging */ +#ifdef CELLDEBUG +/* debug version: real function */ +void DB_mawk_cellcpy(mawk_state_t *MAWK, register mawk_cell_t *target, const register mawk_cell_t *source); +#define mawk_cellcpy DB_mawk_cellcpy +#else +/* fast version: macro */ +extern char *mawk_bozo_cellcpy; +#define mawk_cellcpy(MAWK_, target, source) \ +do { \ + const mawk_cell_t *cellcpy__source = (const mawk_cell_t *)(source); \ + mawk_cell_t *cellcpy__target = (target); \ + cell_paranoia_chk((MAWK_), cellcpy__source); \ + switch (cellcpy__target->type = cellcpy__source->type) { \ + case C_NOINIT: \ + case C_SPACE: \ + case C_SNULL: \ + break; \ + case C_NUM: \ + cellcpy__target->d.dval = cellcpy__source->d.dval; \ + break; \ + case C_STRNUM: \ + cellcpy__target->d.dval = cellcpy__source->d.dval; \ + case C_REPL: \ + case C_MBSTRN: \ + case C_STRING: \ + string(cellcpy__source)->ref_cnt++; \ + case C_RE: \ + cellcpy__target->ptr = cellcpy__source->ptr; \ + break; \ + case C_REPLV: \ + mawk_replv_cpy((MAWK_), cellcpy__target, cellcpy__source); \ + break; \ + default: \ + mawk_bozo(MAWK_, mawk_bozo_cellcpy); \ + break; \ + } \ +} while(0) +#endif + + diff --git a/src/libmawk/code.c b/src/libmawk/code.c new file mode 100644 index 0000000..62bbd66 --- /dev/null +++ b/src/libmawk/code.c @@ -0,0 +1,197 @@ + +/******************************************** +code.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-93, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "code.h" +#include "init.h" +#include "jmp.h" +#include "field.h" + + +static CODEBLOCK *new_code(mawk_state_t *); + +/* grow the active code */ +void mawk_code_grow(mawk_state_t * MAWK) +{ + unsigned oldsize = mawk_code_limit - mawk_code_base; + unsigned newsize = PAGESZ + oldsize; + unsigned delta = mawk_code_ptr - mawk_code_base; + + if (mawk_code_ptr > mawk_code_limit) + mawk_bozo(MAWK, "CODEWARN is too small"); + + mawk_code_base = (INST *) + mawk_zrealloc(MAWK, mawk_code_base, INST_BYTES(oldsize), INST_BYTES(newsize)); + mawk_code_limit = mawk_code_base + newsize; + mawk_code_warn = mawk_code_limit - CODEWARN; + mawk_code_ptr = mawk_code_base + delta; +} + +void mawk_code_reset_size(mawk_state_t *MAWK, int newsize) +{ + unsigned oldsize = mawk_code_limit - mawk_code_base; + + mawk_bfree(MAWK, mawk_code_base, oldsize); + + mawk_code_base = (INST *)mawk_zmalloc(MAWK, INST_BYTES(newsize)); + mawk_code_limit = mawk_code_base + newsize; + mawk_code_warn = mawk_code_limit - CODEWARN; + mawk_code_ptr = mawk_code_base; +} + + +/* shrinks executable code that's done to its final size */ +INST *mawk_code_shrink(mawk_state_t *MAWK, CODEBLOCK *p, unsigned *sizep) +{ + + unsigned oldsize = INST_BYTES(p->limit - p->base); + unsigned newsize = INST_BYTES(p->ptr - p->base); + INST *retval; + + *sizep = newsize; + + retval = (INST *) mawk_zrealloc(MAWK, p->base, oldsize, newsize); + + /* free CODEBLOCK: only INST will be used as this block won't grow anymore */ + MAWK_ZFREE(MAWK, p); + return retval; +} + +/* code an op and a pointer in the active_code */ +void mawk_xcode2(mawk_state_t *MAWK, int op, PTR ptr) +{ + register INST *p = mawk_code_ptr + 2; + + if (p >= mawk_code_warn) { + mawk_code_grow(MAWK); + p = mawk_code_ptr + 2; + } + + p[-2].op = op; + p[-1].ptr = ptr; + mawk_code_ptr = p; +} + +/* code two ops in the active_code */ +void mawk_code2op(mawk_state_t *MAWK, int x, int y) +{ + register INST *p = mawk_code_ptr + 2; + + if (p >= mawk_code_warn) { + mawk_code_grow(MAWK); + p = mawk_code_ptr + 2; + } + + p[-2].op = x; + p[-1].op = y; + mawk_code_ptr = p; +} + +void mawk_code_init(mawk_state_t *MAWK) +{ + MAWK->main_code_p = new_code(MAWK); + + if (MAWK->main_code_p->ptr == NULL) + return; + + MAWK->active_code = *MAWK->main_code_p; + code1(_OMAIN); +} + +/* final code relocation + set_code() as in set concrete */ +void mawk_set_code(mawk_state_t * MAWK) +{ + /* set the main code which is active_code */ + if (MAWK->end_code_p || mawk_code_offset > 1) { + int gl_offset = mawk_code_offset; + + if (MAWK->NR_flag) + mawk_code2op(MAWK, OL_GL_NR, _HALT); + else + mawk_code2op(MAWK, OL_GL, _HALT); + + *MAWK->main_code_p = MAWK->active_code; + MAWK->main_start = mawk_code_shrink(MAWK, MAWK->main_code_p, &MAWK->main_size); + MAWK->next_label = MAWK->main_start + gl_offset; + MAWK->execution_start = MAWK->main_start; + } + else { /* only BEGIN */ + + mawk_zfree(MAWK, mawk_code_base, INST_BYTES(PAGESZ)); + MAWK_ZFREE(MAWK, MAWK->main_code_p); + MAWK->main_code_p = NULL; + } + + /* set the END code */ + if (MAWK->end_code_p) { + MAWK->active_code = *MAWK->end_code_p; + mawk_code2op(MAWK, _EXIT0, _HALT); + *MAWK->end_code_p = MAWK->active_code; + MAWK->end_start = mawk_code_shrink(MAWK, MAWK->end_code_p, &MAWK->end_size); + MAWK->end_start_orig = MAWK->end_start; + } + + /* set the BEGIN code */ + if (MAWK->begin_code_p) { + MAWK->active_code = *MAWK->begin_code_p; + if (MAWK->main_start) + mawk_code2op(MAWK, _JMAIN, _HALT); + else + mawk_code2op(MAWK, _EXIT0, _HALT); + *MAWK->begin_code_p = MAWK->active_code; + MAWK->begin_start = mawk_code_shrink(MAWK, MAWK->begin_code_p, &MAWK->begin_size); + + MAWK->execution_start = MAWK->begin_start; + } + + if (!MAWK->execution_start) { + /* program had functions but no pattern-action bodies */ + MAWK->execution_start = MAWK->begin_start = (INST *) mawk_zmalloc(MAWK, 2 * sizeof(INST)); + MAWK->execution_start[0].op = _EXIT0; + MAWK->execution_start[1].op = _HALT; + } +} + +static CODEBLOCK *new_code(mawk_state_t *MAWK) +{ + CODEBLOCK *p = MAWK_ZMALLOC(MAWK, CODEBLOCK); + + p->base = (INST *) mawk_zmalloc(MAWK, INST_BYTES(PAGESZ)); + p->limit = p->base + PAGESZ; + p->warn = p->limit - CODEWARN; + p->ptr = p->base; + + return p; +} + +/* mawk_begin/end_setup: switches the active_code from MAIN to a + BEGIN or END */ + +void mawk_be_setup(mawk_state_t *MAWK, int scope) +{ + *MAWK->main_code_p = MAWK->active_code; + + if (scope == SCOPE_BEGIN) { + if (!MAWK->begin_code_p) + MAWK->begin_code_p = new_code(MAWK); + MAWK->active_code = *MAWK->begin_code_p; + } + else { + if (!MAWK->end_code_p) + MAWK->end_code_p = new_code(MAWK); + MAWK->active_code = *MAWK->end_code_p; + } +} diff --git a/src/libmawk/code.h b/src/libmawk/code.h new file mode 100644 index 0000000..9437d49 --- /dev/null +++ b/src/libmawk/code.h @@ -0,0 +1,197 @@ + +/******************************************** +code.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef CODE_H +#define CODE_H + +#include + +#define PAGESZ 512 + /* number of code instructions allocated at one time */ +#define CODEWARN 16 + +/* coding scope */ +#define SCOPE_MAIN 0 +#define SCOPE_BEGIN 1 +#define SCOPE_END 2 +#define SCOPE_FUNCT 3 + +#define mawk_code_ptr MAWK->active_code.ptr +#define mawk_code_base MAWK->active_code.base +#define mawk_code_warn MAWK->active_code.warn +#define mawk_code_limit MAWK->active_code.limit +#define mawk_code_offset (mawk_code_ptr-mawk_code_base) + +#define INST_BYTES(x) (sizeof(INST)*(unsigned)(x)) + +#define addloc \ + if ((MAWK->debug_symbols) && (MAWK->token_lineno != MAWK->last_token_lineno)) { \ + mawk_code_ptr++ -> op = LOCATION; \ + mawk_code_ptr++ -> op = MAWK->token_lineno; \ + MAWK->last_token_lineno = MAWK->token_lineno; \ + } \ + +#define code1(x) \ + do { \ + mawk_code_ptr++ -> op = (x); \ + } while(0) + +/* shutup picky compilers */ +#define code2(MAWK, x,p) \ + do { \ + addloc \ + mawk_xcode2(MAWK, x,(PTR)(p)); \ + } while(0) + +void mawk_xcode2(mawk_state_t *, int, PTR); +void mawk_code2op(mawk_state_t *, int, int); +INST *mawk_code_shrink(mawk_state_t *, CODEBLOCK *, unsigned *); +void mawk_code_reset_size(mawk_state_t *MAWK, int size); +void mawk_code_grow(mawk_state_t * MAWK); +void mawk_set_code(mawk_state_t *); +void mawk_be_setup(mawk_state_t *, int); +void mawk_dump_code(mawk_state_t *); /* dump code in a format dictated by MAKW->dump_code_flag */ +void mawk_dump_code_text(mawk_state_t *); +void mawk_dump_sym_text(mawk_state_t *); /* dump symbol table in text (in binary it's automatically included) */ + +const char *mawk_find_bi_name(PF_CP); +PF_CP mawk_find_bi_ptr(const char *name); + +/* the machine opcodes */ +/* to avoid confusion with a ptr FE_PUSHA must have op code 0 */ +/* unfortunately enums are less portable than defines */ + +#define FE_PUSHA 0 +#define FE_PUSHI 1 +#define F_PUSHA 2 +#define F_PUSHI 3 +#define NF_PUSHI 4 +#define _HALT 5 +#define _RANGE_STOP 6 +#define _PUSHC 7 +#define _PUSHD 8 +#define _PUSHS 9 +#define _PUSHINT 10 +#define _PUSHA 11 +#define _PUSHI 12 +#define L_PUSHA 13 +#define L_PUSHI 14 +#define AE_PUSHA 15 +#define AE_PUSHI 16 +#define A_PUSHA 17 +#define LAE_PUSHA 18 +#define LAE_PUSHI 19 +#define LA_PUSHA 20 +#define _POP 21 +#define _ADD 22 +#define _SUB 23 +#define _MUL 24 +#define _DIV 25 +#define _MOD 26 +#define _POW 27 +#define _NOT 28 +#define _TEST 29 +#define A_TEST 30 +#define A_DEL 31 +#define ALOOP 32 +#define A_CAT 33 +#define _UMINUS 34 +#define _UPLUS 35 +#define _ASSIGN 36 +#define _ADD_ASG 37 +#define _SUB_ASG 38 +#define _MUL_ASG 39 +#define _DIV_ASG 40 +#define _MOD_ASG 41 +#define _POW_ASG 42 +#define F_ASSIGN 43 +#define F_ADD_ASG 44 +#define F_SUB_ASG 45 +#define F_MUL_ASG 46 +#define F_DIV_ASG 47 +#define F_MOD_ASG 48 +#define F_POW_ASG 49 +#define _CAT 50 +#define _BUILTIN 51 +#define _PRINT 52 +#define _POST_INC 53 +#define _POST_DEC 54 +#define _PRE_INC 55 +#define _PRE_DEC 56 +#define F_POST_INC 57 +#define F_POST_DEC 58 +#define F_PRE_INC 59 +#define F_PRE_DEC 60 +#define _JMP 61 +#define _JNZ 62 +#define _JZ 63 +#define _LJZ 64 +#define _LJNZ 65 +#define _EQ 66 +#define _NEQ 67 +#define _LT 68 +#define _LTE 69 +#define _GT 70 +#define _GTE 71 +#define _MATCH0 72 +#define _MATCH1 73 +#define _MATCH2 74 +#define _EXIT 75 +#define _EXIT0 76 +#define _NEXT 77 +#define _RANGE_CHK 78 +#define _CALL 79 +#define _RET 80 +#define _RET0 81 +#define SET_ALOOP 82 +#define POP_AL 83 +#define OL_GL 84 +#define OL_GL_NR 85 +#define _OMAIN 86 +#define _JMAIN 87 +#define DEL_A 88 +#define LOCATION 89 +/* for catching array write: */ +#define _ASSIGN_ARR 90 +#define _ADD_ASG_ARR 91 +#define _SUB_ASG_ARR 92 +#define _MUL_ASG_ARR 93 +#define _DIV_ASG_ARR 94 +#define _MOD_ASG_ARR 95 +#define _POW_ASG_ARR 96 +#define _POST_INC_ARR 97 +#define _POST_DEC_ARR 98 +#define _PRE_INC_ARR 99 +#define _PRE_DEC_ARR 100 +#define AE_PUSHA_WRARR 101 +#define LAE_PUSHA_WRARR 102 + +/* these opcodes are used for binary dumps to hold references to different + linked pointers - assume op to be at least 32 bits unsigned (but pointers + are not narrower than that on any target platform). Each opcode is + a bitmask applied onto an index integer */ +#define DUMP_REPL 1 << 28 +#define DUMP_NUM 2 << 28 +#define DUMP_STR 3 << 28 +#define DUMP_VAR 4 << 28 +#define DUMP_FIELDIDX 5 << 28 +#define DUMP_CALL 6 << 28 +#define DUMP_SPECFIELD 7 << 28 +#define DUMP_RE 8 << 28 +#define DUMP_SPLIT_SPACE 9 << 28 +#define DUMP_SPLIT_NULL 10 << 28 + +#endif /* CODE_H */ diff --git a/src/libmawk/code_dump.c b/src/libmawk/code_dump.c new file mode 100644 index 0000000..43c58a2 --- /dev/null +++ b/src/libmawk/code_dump.c @@ -0,0 +1,31 @@ +/******************************************** +dump_code.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-93, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "code.h" +#include "init.h" +#include "jmp.h" +#include "field.h" +#include "da_bin_helper.h" + +void mawk_dump_code(mawk_state_t * MAWK) +{ + switch(MAWK->dump_code_flag) { + case 1: mawk_dump_code_text(MAWK); break; + case 2: mawk_print_code_bin(MAWK, ""); break; + default: + mawk_errmsg(MAWK, -1, "libmawk internal error: unknown MAWK->dump_code_flag value %d\n", MAWK->dump_code_flag); + abort(); + } +} diff --git a/src/libmawk/conf.h.in b/src/libmawk/conf.h.in new file mode 100644 index 0000000..deb385a --- /dev/null +++ b/src/libmawk/conf.h.in @@ -0,0 +1,75 @@ +put /tmpasm/IFS {\n} +put /tmpasm/OFS {\n} + +put /local/includes ?sys/types/size_t/includes +gsub /local/includes {\\\\n *} {\n} +uniq /local/includes + +put /tmpasm/OFS {} +put /tmpasm/IFS {} + +print [@ +/* Autogenerated by ./configure from conf.h.in - DO NOT EDIT */ +#ifndef LIBMAWK_CONF_H +#define LIBMAWK_CONF_H + +#define LMAWK_VER "@/local/version/1@.@/local/version/2@.@/local/version/3@" + +#define _XOPEN_SOURCE 600 + +@/local/includes@ + +@] + +switch /local/numeric +case {int} print [@ +/* numeric is int, do not use floating point */ +#define MAWK_NO_FLOAT +@] +end + +case {double} print [@ +/* numeric is double, use floating point */ +#undef MAWK_NO_FLOAT +@] +end +end + +print {\n\n/* Whether realpath is available */\n} +print_ternary ?libs/fs/realpath/presents {#define mawk_realpath realpath} {#undef mawk_realpath} + +print {\n\n/* Whether pipe(2) is available */\n} +print_ternary ?libs/io/pipe/presents {#undef MAWK_NO_PIPE} {#define MAWK_NO_PIPE 1} + +print [@ +/* === math_wrap */ +@] + +print {/* log() */\n} +switch ?libs/math/cc/log/m_0/errno + case {0} print {#define P_MBROKEN_LOG_M_0\n}; end + default print {/* #define P_MBROKEN_LOG_M_0 */\n}; end +end + +switch ?libs/math/cc/log/p_0/errno + case {0} print {#define P_MBROKEN_LOG_P_0\n}; end + default print {/* #define P_MBROKEN_LOG_P_0 */\n}; end +end + +switch ?libs/math/cc/log/p_1/errno + case {0} print {#define P_MBROKEN_LOG_P_1\n}; end + default print {/* #define P_MBROKEN_LOG_P_1 */\n}; end +end + +print {\n\n/* Defined if NaN works as expected in all operations */\n} +print_ternary ?libs/math/nanop/allok {#define MAWK_HAVE_SAFE_NAN} {/* #define MAWK_HAVE_SAFE_NAN */} + +print [@ +/* defined if all static global allocations such as builtin variables + should be freed. Pros: easier memory leak testing (no builtin leaks); + cons: zmalloc's uninit will throw them out in less cpu cycles. +*/ +/* #define MAWK_MEM_PEDANTIC */ +#endif + +@] diff --git a/src/libmawk/da_bin.c b/src/libmawk/da_bin.c new file mode 100644 index 0000000..3c66034 --- /dev/null +++ b/src/libmawk/da_bin.c @@ -0,0 +1,938 @@ + +/******************************************** +da_bin.c + +libmawk binary dump (C) 2012, Tibor 'Igor2' Palinkas; +losely based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" + +#include "code.h" +#include "bi_funct.h" +#include "repl.h" +#include "field.h" +#include "num.h" +#include "fin.h" +#include "vars.h" +#include "f2d.h" +#include + +/* for load sanity checks declare large maximums; without these a malformed + binary may cause libmawk to allocate a lot of memory */ +#define MEG (1024*1024) +#define BIN_CODE_MAXLEN 64*MEG +#define BIN_NAME_MAXLEN 1024 +#define BIN_MAXVARS 1024*1024 +#define BIN_MAXSTRS BIN_MAXVARS + +#define BIN_FILEVER 1 + +/* SAVE: + - code + - constant nums + - constant str + - funtion names and pointers (including builtins!) + (->repl is not saved; regex's are recompiled since regex compiler needs + to be in the binary for runtime purposes anyway - save them as string) +*/ + +static int safe_strcmp(const char *s1, const char *s2) +{ + if ((s1 == NULL) && (s2 == NULL)) + return 0; + if ((s1 == NULL) || (s2 == NULL)) + return 1; + return strcmp(s1, s2); +} + +static int safe_strlen(const char *s) +{ + if (s == NULL) + return 0; + return strlen(s); +} + +static mawk_string_t *safe_new_STRING(mawk_state_t *MAWK, const char *s) +{ + if (s == NULL) + s = ""; + return mawk_new_STRING(MAWK, s); +} + +/* link tables are slow linear lists for now; may replace with hashes later */ +typedef struct { + int numsu, strsu, varsu; + int numsa, strsa, varsa; + mawk_num_t *nums; + const char **strs; + struct { + int vtype; + mawk_cell_t *cp; + } *vars; +} link_t; + + +static void link_free(link_t *l) +{ + if (l->nums != NULL) + free(l->nums); + if (l->strs != NULL) + free(l->strs); + if (l->vars != NULL) + free(l->vars); +} + +#define check_grow(l, arr, used, alloced) \ + if (l->used >= l->alloced) { \ + l->alloced += 256; \ + l->arr = realloc(l->arr, sizeof(*l->arr) * l->alloced); \ + } + +static int save_num(link_t *l, mawk_num_t *num) +{ + int n; + for(n = 0; n < l->numsu; n++) + if (l->nums[n] == *num) + return n; + + check_grow(l, nums, numsu, numsa); + l->nums[l->numsu] = *num; + return l->numsu++; +} + + +static int save_str(link_t *l, const char *str) +{ + int n; + for(n = 0; n < l->strsu; n++) + if (safe_strcmp(l->strs[n], str) == 0) { + return n; + } + check_grow(l, strs, strsu, strsa); + l->strs[l->strsu] = str; /* no need to copy, strings are constant during dumping */ + return l->strsu++; +} + +static int save_var(link_t *l, mawk_cell_t *v, int vtype) +{ + int n; + for(n = 0; n < l->varsu; n++) + if ((l->vars[n].cp == v) && (l->vars[n].vtype == vtype)) + return n; + + check_grow(l, vars, varsu, varsa); + l->vars[l->varsu].vtype = vtype; + l->vars[l->varsu].cp = v; + return l->varsu++; +} + +static mawk_num_t *load_num(mawk_state_t *MAWK, link_t *l, int id) +{ + mawk_num_t *n; + + if ((id < 0) || (id >= l->numsu)) + return NULL; + + n = mawk_zmalloc(MAWK, sizeof(mawk_num_t)); + *n = l->nums[id]; + return n; +} + +static const char *invalid_str = "load_str invalid"; +static const char *load_str(link_t *l, int id) +{ + if ((id < 0) || (id >= l->strsu)) + return invalid_str; + return l->strs[id]; +} + +static mawk_cell_t *load_var(link_t *l, int id) +{ + if ((id < 0) || (id >= l->varsu)) + return NULL; + return l->vars[id].cp; +} + +static void *load_bifunc(link_t *l, int id) +{ + const char *name; + name = load_str(l, id); + if (name == invalid_str) + return NULL; + return mawk_f2d(mawk_find_bi_ptr(name)); +} + +static FBLOCK *bin_new_funct(mawk_state_t *MAWK, const char *name, int add_to_list) +{ + FBLOCK *fbp; + SYMTAB *s; + + s = mawk_find(MAWK, name, 0); + if (s != NULL) { + if (s->type != ST_FUNCT) + return NULL; + + if (add_to_list) + mawk_add_to_fdump_list(MAWK, s->stval.fbp); + return s->stval.fbp; + } + + s = mawk_find(MAWK, name, 1); + s->type = ST_FUNCT ; + fbp = s->stval.fbp = (FBLOCK *) mawk_zmalloc(MAWK, sizeof(FBLOCK)) ; + fbp->name = mawk_zstrclone(MAWK, name); + fbp->code = (INST*) 0 ; + + if (add_to_list) + mawk_add_to_fdump_list(MAWK, fbp); + + return fbp; +} + +static void *bin_load_func_ref(mawk_state_t *MAWK, link_t *l, int id) +{ + const char *name; + + name = load_str(l, id); + if (name == invalid_str) + return NULL; + return bin_new_funct(MAWK, name, 0); +} + + + +#define pack_re(p, cp) p->op = DUMP_RE | save_str(l, mawk_re_uncompile(MAWK, cp->ptr)) +#define pack_match(p) p->op = DUMP_RE | save_str(l, mawk_re_uncompile(MAWK, p->ptr)) +#define pack_repl(p, r) p->op = DUMP_REPL | save_str(l, mawk_repl_uncompile(MAWK, r)) +#define pack_numptr(p) p->op = DUMP_NUM | save_num(l, (mawk_num_t *)p->ptr) +#define pack_split_space(p) p->op = DUMP_SPLIT_SPACE +#define pack_split_null(p) p->op = DUMP_SPLIT_NULL +#define pack_str(p) \ + do { \ + mawk_string_t *sval = (mawk_string_t *) p->ptr; \ + p->op = DUMP_STR | save_str(l, sval->str); \ + } while(0) + +#define pack_var_(p, cp, t) p->op = DUMP_VAR | save_var(l, p->ptr, t) +#define pack_var(p, cp) pack_var_(p, cp, ST_VAR) +#define pack_field(p, cp) pack_var_(p, cp, ST_FIELD) +#define pack_field_0(p) p->op = DUMP_SPECFIELD | 0 +#define pack_field_shadow(p) p->op = DUMP_SPECFIELD | 1 +#define pack_arr(p, cp) pack_var_(p, cp, ST_ARRAY) +#define pack_index(p, cp) p->op = DUMP_FIELDIDX | mawk_field_addr_to_index(MAWK, cp) +#define pack_bifunc(p, cp) p->op = DUMP_CALL | save_str(l, mawk_find_bi_name((PF_CP) mawk_d2f(p->ptr))) +#define pack_call(p) p->op = DUMP_CALL | save_str(l, ((FBLOCK *) p->ptr)->name) + + +/* check if op matches flag; return NULL if not or remove flag */ +#define chk_mask(op, flag) do { if ((op & flag) != flag) return NULL; op &= ~(flag); } while(0) + +#define unpack_re(p, cp) do { \ + const char *s; \ + chk_mask(p->op, DUMP_RE); \ + s = load_str(l, p->op); \ + if (s == invalid_str) \ + return NULL; \ + cp = mawk_zmalloc(MAWK, sizeof(mawk_cell_t)); \ + p->ptr = cp; \ + cp->type = C_RE; \ + cp->ptr = mawk_re_compile(MAWK, mawk_new_STRING(MAWK, s)); \ +} while(0) + +#define unpack_match(p) do { \ + const char *s; \ + chk_mask(p->op, DUMP_RE); \ + s = load_str(l, p->op); \ + if (s == invalid_str) \ + return NULL; \ + p->ptr = mawk_re_compile(MAWK, mawk_new_STRING(MAWK, s)); \ +} while(0) + +#define unpack_repl(p, r) do { \ + const char *s; \ + chk_mask(p->op, DUMP_REPL); \ + s = load_str(l, p->op); \ + if (s == invalid_str) \ + return NULL; \ + cp = mawk_zmalloc(MAWK, sizeof(mawk_cell_t)); \ + p->ptr = mawk_repl_compile(MAWK, safe_new_STRING(MAWK, s)); \ +} while(0) + +#define unpack_numptr(p) do { chk_mask(p->op, DUMP_NUM); p->ptr = load_num(MAWK, l, p->op); if (p->ptr == NULL) return NULL; } while(0) +#define unpack_split_space(p) do { mawk_cell_t *cp; chk_mask(p->op, DUMP_SPLIT_SPACE); cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); p->ptr = cp; cp->type = C_SPACE; } while(0) +#define unpack_split_null(p) do { mawk_cell_t *cp; chk_mask(p->op, DUMP_SPLIT_NULL); cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); p->ptr = cp; cp->type = C_SNULL; } while(0) +#define unpack_str(p) \ + do { \ + const char *s; \ + chk_mask(p->op, DUMP_STR); \ + s = load_str(l, p->op); \ + if (s == invalid_str) \ + return NULL; \ + p->ptr = safe_new_STRING(MAWK, s); \ + } while(0) +#define unpack_var_(p, cp, t) do { chk_mask(p->op, DUMP_VAR); p->ptr = load_var(l, p->op); if (p->ptr == NULL) return NULL; } while (0) +#define unpack_var(p, cp) unpack_var_(p, cp, ST_VAR) +#define unpack_field(p, cp) unpack_var_(p, cp, ST_FIELD) +#define unpack_field_0(p) do { chk_mask(p->op, DUMP_SPECFIELD); p->ptr = MAWK->field;} while (0) +#define unpack_field_shadow(p) do { chk_mask(p->op, DUMP_SPECFIELD); p->ptr = &MAWK->fs_shadow; } while (0) +#define unpack_arr(p, cp) unpack_var_(p, cp, ST_ARRAY) +#define unpack_index(p, cp) do { chk_mask(p->op, DUMP_FIELDIDX); p->ptr = field_ptr(p->op); } while (0) +#define unpack_bifunc(p, cp) do { chk_mask(p->op, DUMP_CALL); p->ptr = load_bifunc(l, p->op); if (p->ptr == NULL) return NULL; } while (0) +#define unpack_call(p) do { chk_mask(p->op, DUMP_CALL); p->ptr = bin_load_func_ref(MAWK, l, p->op); if (p->ptr == NULL) return NULL; } while (0) + +/* evaluate macro pack_ or unpack_, depending on pack flag */ +#define link_re(p, cp) do { if (pack) pack_re(p, cp); else unpack_re(p, cp); } while (0) +#define link_match(p) do { if (pack) pack_match(p); else unpack_match(p); } while (0) +#define link_repl(p, r) do { if (pack) pack_repl(p, r); else unpack_repl(p, r); } while (0) +#define link_numptr(p) do { if (pack) pack_numptr(p); else unpack_numptr(p); } while (0) +#define link_split_space(p) do { if (pack) pack_split_space(p); else unpack_split_space(p); } while (0) +#define link_split_null(p) do { if (pack) pack_split_null(p); else unpack_split_null(p); } while (0) +#define link_str(p) do { if (pack) pack_str(p); else unpack_str(p); } while (0) +#define link_var(p, cp) do { if (pack) pack_var(p, cp); else unpack_var(p, cp); } while (0) +#define link_field(p, cp) do { if (pack) pack_field(p, cp); else unpack_field(p, cp); } while (0) +#define link_field_0(p) do { if (pack) pack_field_0(p); else unpack_field_0(p); } while (0) +#define link_field_shadow(p) do { if (pack) pack_field_shadow(p); else unpack_field_shadow(p); } while (0) +#define link_arr(p, cp) do { if (pack) pack_arr(p, cp); else unpack_arr(p, cp); } while (0) +#define link_index(p, cp) do { if (pack) pack_index(p, cp); else unpack_index(p, cp); } while (0) +#define link_bifunc(p, cp) do { if (pack) pack_bifunc(p, cp); else unpack_bifunc(p, cp); } while (0) +#define link_call(p) do { if (pack) pack_call(p); else unpack_call(p); } while (0) + +INST *mawk_da_bin(mawk_state_t *MAWK, INST *start, link_t *l, int pack) +{ + mawk_cell_t *cp; + register INST *p = start; + +/* fprintf(stderr, "-\n"); */ + do { +/* fprintf(stderr, "%d: %d\n", p-start, p->op);*/ + + switch (p++->op) { + + case _PUSHC: + cp = (mawk_cell_t *) p->ptr; + if (pack) { + switch (cp->type) { + case C_RE: + link_re(p, cp); + break; + case C_REPL: + case C_REPLV: + link_repl(p, cp); + break; + case C_SPACE: + link_split_space(p); + break; + case C_SNULL: + link_split_null(p); + break; + default: + /* nothing else to save */ + break; + } + } + else { + if (p->op == DUMP_SPLIT_SPACE) + link_split_space(p); + else if (p->op == DUMP_SPLIT_NULL) + link_split_null(p); + else if ((p->op & DUMP_RE) == DUMP_RE) + link_re(p, cp); + else if ((p->op & DUMP_REPL) == DUMP_REPL) + link_repl(p, cp); + } + p++; + break; + + case _PUSHD: + link_numptr(p); + p++; + break; + case _PUSHS: + link_str(p); + p++; + break; + case _MATCH0: + case _MATCH1: + link_match(p); + p++; + break; + + case _PUSHA: + cp = (mawk_cell_t *) p->ptr; + link_var(p, cp); + p++; + break; + + case _PUSHI: + cp = (mawk_cell_t *) p->ptr; + if (pack) { + if (cp == MAWK->field) + link_field_0(p); + else if (cp == &MAWK->fs_shadow) + link_field_shadow(p); + else { + if (cp > MAWK_NF && cp <= LAST_PFIELD) + link_field(p, cp); + else + link_var(p, cp); + } + } + else { + if (p->op == (DUMP_SPECFIELD | 0)) + link_field_0(p); + else if (p->op == (DUMP_SPECFIELD | 1)) + link_field_shadow(p); + else + link_var(p, cp); /* fields are just variables */ + } + p++; + break; + + case L_PUSHA: + case L_PUSHI: + case LAE_PUSHI: + case LAE_PUSHA: + case LAE_PUSHA_WRARR: + case LA_PUSHA: + /* address is (p+1)->op */ + p++; + break; + + case F_PUSHI: + /* address is (p+1)->op */ + p++; + p++; + break; + + + case F_PUSHA: + cp = (mawk_cell_t *) p->ptr; + if (pack) { + if (cp >= MAWK_NF && cp <= LAST_PFIELD) + link_field(p, cp); + else + link_index(p, cp); + } + else { + if ((p->op & DUMP_FIELDIDX) == DUMP_FIELDIDX) + link_index(p, cp); + else + link_field(p, cp); + } + p++; + break; + + + case AE_PUSHA: + case AE_PUSHA_WRARR: + case AE_PUSHI: + case A_PUSHA: + cp = (mawk_cell_t *) p->ptr; + link_arr(p, cp); + p++; + break; + + case _PUSHINT: + /* integer operand is (p+1)->op */ + p++; + break; + + case _BUILTIN: + case _PRINT: + cp = (mawk_cell_t *) p->ptr; + link_bifunc(p, cp); + p++; + break; + + case _JMP: + case _JNZ: + case _JZ: + case _LJZ: + case _LJNZ: + case SET_ALOOP: + case ALOOP: + case A_CAT: + /* address is (p+1)->op */ + p++; + break; + + case _CALL: + link_call(p); + p += 2; + break; + + case _HALT: + break; + + case _RANGE_CHK: + p += 4; + break; + + case LOCATION: + p++; + break; + + default: + /* simple code */ + break; + } + } while (p->op != _HALT); + p++; + return p; +} + +void mawk_fdump_bin(mawk_state_t * MAWK, link_t *l) +{ + register struct mawk_fdump *p, *q = MAWK->fdump_list; + + while (q) { + p = q; + q = p->link; +/* fprintf(stdout, "function %s\n", p->fbp->name); */ + mawk_da_bin(MAWK, p->fbp->code, l, 1); + } +} + + +static const char *f_id = "libmawk\n"; +typedef struct header_s { + char f_id[8]; + char numeric; /* see mawk_num_id in num_*.c */ + char filever; /* should be BIN_FILEVER */ + union { + short int s; + unsigned char c; + } byteorder; + char inst_size; + char num_size; + char pad[50]; /* reserved for future use - header size should be 64 bytes long */ +} header_t; + +#define chk_hdr_size(h) \ + if (sizeof(h) != 64) { \ + mawk_errmsg(MAWK, 0, "Internal libmawk error: da_bin: header struct size is not 64 bytes but %d bytes", sizeof(h)); \ + abort(); \ + } + +#ifndef MAWK_NO_COMP +static int link_bin_sect(mawk_state_t * MAWK, INST *b, link_t *l) +{ + INST *p; + + if (b != NULL) { + p = mawk_da_bin(MAWK, b, l, 1); + return p - b; + } + return 0; +} + +static int save_bin_sect(mawk_state_t * MAWK, void *fd, int (*write)(void *fd, const void *buff, size_t len), INST *b, int len) +{ + if (write(fd, &len, 4) != 4) + return -1; + if (b != NULL) { + int l; + l = len * sizeof(INST); + if (write(fd, b, l) != l) + return -1; + } + return 0; +} + +int mawk_save_code_bin_(mawk_state_t * MAWK, void *fd, int (*write)(void *fd, const void *buff, size_t len)) +{ + header_t h; + link_t l; + int n; + long int len, begin_len, end_len, main_len, num_func; + long int *flen; + struct mawk_fdump **fps; + struct mawk_fdump *fp; + + /* create header */ + chk_hdr_size(h); + memcpy(h.f_id, f_id, sizeof(h.f_id)); + h.numeric = mawk_num_id; + h.byteorder.s = 0x626F; /* bo in MSB */ + h.filever = BIN_FILEVER; + h.inst_size = sizeof(INST); + h.num_size = sizeof(mawk_num_t); + memset(h.pad, 0, sizeof(h.pad)); + write(fd, &h, sizeof(h)); + + /* prepare for linking */ + memset(&l, 0, sizeof(l)); + + /* count functions */ + num_func = 0; + for(fp = MAWK->fdump_list; fp != NULL; fp = fp->link) + num_func++; + if (num_func > 0) { + flen = malloc(sizeof(long int) * num_func); + fps = malloc(sizeof(struct mawk_fdump *) * num_func); + } + for(n = 0, fp = MAWK->fdump_list; fp != NULL; n++, fp = fp->link) { + fps[n] = fp; + } + + +/* fprintf(stderr, "Funcs: %d\n", num_func); */ + + begin_len = link_bin_sect(MAWK, MAWK->begin_start, &l); +/* fprintf(stderr, "BEGIN: total %d instructions\n", begin_len); */ + end_len = link_bin_sect(MAWK, MAWK->end_start, &l); +/* fprintf(stderr, "END: total %d instructions\n", end_len); */ + main_len = link_bin_sect(MAWK, MAWK->main_start, &l); +/* fprintf(stderr, "MAIN: total %d instructions\n", main_len); */ + + for(n = 0, fp = MAWK->fdump_list; fp != NULL; n++, fp = fp->link) + flen[n] = link_bin_sect(MAWK, fp->fbp->code, &l); + + /* L1: save nums */ + len = l.numsu; + write(fd, &len, 4); + write(fd, l.nums, sizeof(mawk_num_t) * l.numsu); + write(fd, &num_func, 4); + + /* L2: save strings */ + len = l.strsu; + write(fd, &len, 4); + for(n = 0; n < l.strsu; n++) { + len = safe_strlen(l.strs[n]); + write(fd, &len, 4); + if (len > 0) + write(fd, l.strs[n], len); + } + + /* L3: save vars */ + len = l.varsu; + write(fd, &len, 4); + for(n = 0; n < l.varsu; n++) { + const char *name; + SYMTAB *stp; + + name = mawk_reverse_find(MAWK, l.vars[n].vtype, &(l.vars[n].cp)); + if (name == mawk_reverse_uk) { + mawk_errmsg(MAWK, 0, "da_bin: Variable name lookup error"); + return -1; + } + stp = mawk_find(MAWK, name, 0); + + /* save types */ + write(fd, &(l.vars[n].vtype), 2); + write(fd, &(stp->type), 1); + + /* save name len and name*/ + len = strlen(name); + write(fd, &len, 4); + write(fd, name, len); + + /* save value len and content - empty for now */ + len = 0; + write(fd, &len, 4); + } + + /* L4: save begin/end/main sections */ + save_bin_sect(MAWK, fd, write, MAWK->begin_start, begin_len); + save_bin_sect(MAWK, fd, write, MAWK->end_start, end_len); + save_bin_sect(MAWK, fd, write, MAWK->main_start, main_len); + + + /* L5: save user functions */ + + for(n = num_func-1; n >= 0; n--) { + int len; + + fp = fps[n]; + len = strlen(fp->fbp->name); +/* fprintf(stderr, "da_bin function %s/%d len=%d\n", fp->fbp->name, len,flen[n]); */ + write(fd, &len, 4); + write(fd, fp->fbp->name, len); +/* fprintf(stderr, "NARGS=%d %d\n", fp->fbp->nargs, fp->fbp->typev[0]); */ + write(fd, &fp->fbp->nargs, 2); + if (fp->fbp->nargs > 0) + write(fd, fp->fbp->typev, fp->fbp->nargs); + save_bin_sect(MAWK, fd, write, fp->fbp->code, flen[n]); + } + if (num_func > 0) { + free(flen); + free(fps); + } + + link_free(&l); + return 0; +} +#endif + +#ifndef MAWK_NO_EXEC +static int bin_load_func(mawk_state_t *MAWK, void *fd, int (*read)(void *fd, void *buff, size_t len), FBLOCK *fbp) +{ + int len; + read(fd, &len, 4); + if (len > 0) { + if (len > BIN_CODE_MAXLEN) + return -1; + + fbp->code = mawk_zmalloc(MAWK, len * sizeof(INST)); + read(fd, fbp->code, len * sizeof(INST)); + } + return 0; +} + +static int bin_load_sect(mawk_state_t *MAWK, void *fd, int (*read)(void *fd, void *buff, size_t len), int scope) +{ + int len; + + read(fd, &len, 4); + if (len > 0) { + MAWK->scope = scope; + if ((scope == SCOPE_BEGIN) || (scope == SCOPE_END)) + mawk_be_setup(MAWK, MAWK->scope); + mawk_code_reset_size(MAWK, len * sizeof(INST)); + read(fd, MAWK->active_code.base, len * sizeof(INST)); + MAWK->active_code.ptr = MAWK->active_code.base + len; + switch(scope) { + case SCOPE_BEGIN: *MAWK->begin_code_p = MAWK->active_code; break; + case SCOPE_END: *MAWK->end_code_p = MAWK->active_code; break; + case SCOPE_MAIN: *MAWK->main_code_p = MAWK->active_code; break; + /* avoid using default so later when scope is enum compiler can give warnings on missing scopes */ + } + MAWK->active_code = *MAWK->main_code_p; + } + return 0; +} + +static int bin_link_sect(mawk_state_t *MAWK, link_t *l, INST *blk, const char *sectname) +{ + if (blk == NULL) + return 0; + if (mawk_da_bin(MAWK, blk, l, 0) == NULL) { + mawk_errmsg(MAWK, 0, "da_bin: Error: failed to link binary in section %s", sectname); + return 1; + } + return 0; +} + + +int mawk_load_code_bin_(mawk_state_t *MAWK, void *fd, int (*read)(void *fd, void *buff, size_t len)) +{ + SYMTAB *stp; + header_t h; + link_t l; + int n, num_func; + long int len; + int ret; + + memset(&l, 0, sizeof(l)); + + /* read header */ + chk_hdr_size(h); + + read(fd, &h, sizeof(h)); + + ret = 0; + if (memcmp(h.f_id, f_id, sizeof(h.f_id)) != 0) { + mawk_errmsg(MAWK, 0, "da_bin: header ID mismatch"); + ret = MAWK_EHDRSIZE; + goto err; + } + if (h.numeric != mawk_num_id) { + mawk_errmsg(MAWK, 0, "da_bin: magic mismatch"); + ret = MAWK_EFILEMAGIC; + goto err; + } + if (h.byteorder.s != 0x626F) { /* bo in MSB */ + mawk_errmsg(MAWK, 0, "da_bin: byte order mismatch"); + ret = MAWK_EBYTEORDER; + goto err; + } + if (h.filever != BIN_FILEVER) { + mawk_errmsg(MAWK, 0, "da_bin: file version mismatch: produced by %d, can load %d", h.filever, BIN_FILEVER); + ret = MAWK_EVERSION; + goto err; + } + if (h.inst_size != sizeof(INST)) { + mawk_errmsg(MAWK, 0, "da_bin: instruction size mismatch: expected %d, got %d", sizeof(INST), h.inst_size); + ret = MAWK_EINSTSIZE; + goto err; + } + if (h.num_size != sizeof(mawk_num_t)) { + mawk_errmsg(MAWK, 0, "da_bin: numeric size mismatch: expected %d, got %d", sizeof(mawk_num_t), h.num_size); + ret = MAWK_ENUMSIZE; + goto err; + } + + /* prepare for linking */ + memset(&l, 0, sizeof(l)); + + + /* L1: load nums */ + read(fd, &len, 4); + l.numsa = l.numsu = len; + len = sizeof(*l.nums) * l.numsa; + if (len > BIN_MAXVARS) + goto err; + l.nums = malloc(len); + read(fd, l.nums, len); + read(fd, &num_func, 4); + + /* L2: load strings */ + read(fd, &len, 4); + l.strsa = l.strsu = len; + len = sizeof(*l.strs) * l.strsa; + if (len > BIN_MAXSTRS) + goto err; + l.strs = malloc(len); + for(n = 0; n < l.strsu; n++) { + char *name; + + read(fd, &len, 4); + if (len > BIN_NAME_MAXLEN) + goto err; + if (len > 0) { + l.strs[n] = mawk_zmalloc(MAWK, len+1); + if (l.strs[n] == NULL) { + ret = MAWK_EALLOC; + goto err; + } + read(fd, (char *)l.strs[n], len); + name = (char *)l.strs[n]; + name[len] = '\0'; + } + else + l.strs[n] = NULL; +/* fprintf(stderr, "LOAD str: [%d] '%s'\n", n, l.strs[n]); */ + } + + /* L3: load vars */ + read(fd, &len, 4); + l.varsa = l.varsu = len; + len = sizeof(*l.vars) * l.varsa; + if (l.varsa > BIN_MAXVARS) + goto err; + l.vars = malloc(len); +/* fprintf(stderr, "numvars: %d allocated=%d\n", l.varsu, len); */ + for(n = 0; n < l.varsu; n++) { + char *name; + short type; + char stype; + /* load type */ + read(fd, &type, 2); + read(fd, &stype, 1); + + /* load name len and name*/ + read(fd, &len, 4); + if (len > BIN_NAME_MAXLEN) + goto err; + name = mawk_zmalloc(MAWK, len+1); + read(fd, name, len); + name[len] = '\0'; + + /* go for existing vars first (because ARGV and builtins) */ + l.vars[n].cp = mawk_get_var(MAWK, name); + + if (l.vars[n].cp == NULL) { + if (type == ST_ARRAY) { + SYMTAB *stp; + + stp = mawk_insert(MAWK, name); + stp->stval.array = mawk_array_new(MAWK, NULL); + stp->type = ST_ARRAY; + l.vars[n].cp = (mawk_cell_t *)stp->stval.array; + } + else { + l.vars[n].cp = mawk_create_var(MAWK, name, NULL); + if (l.vars[n].cp == NULL) { + mawk_errmsg(MAWK, 0, "da_bin: load error: can not create variable %s", name); + goto err; + } + } + } + l.vars[n].vtype = type; + stp = mawk_find(MAWK, name, 0); + if (stp->type != stype) { + mawk_errmsg(MAWK, 0, "da_bin warning: type of variable %s changed from %d to %d", name, stp->type, stype); + } + stp->type = stype; + +/* fprintf(stderr, "LOAD var name='%s' type=%d len=%d: %d -> %p\n", name, type, len, n, l.vars[n].cp); */ + /* load value len and content - empty for now */ + read(fd, &len, 4); + if (len != 0) { + ret = MAWK_EWRONGVAL; + goto err2; + } + } + + + /* L4: load begin/end/main sections */ + mawk_code_init(MAWK); +/* printf("D1: %p %p %p\n", MAWK->main_code_p, MAWK->begin_code_p, MAWK->end_code_p); */ + if (bin_load_sect(MAWK, fd, read, SCOPE_BEGIN)) + goto err; + if (bin_load_sect(MAWK, fd, read, SCOPE_END)) + goto err; + if (bin_load_sect(MAWK, fd, read, SCOPE_MAIN)) + goto err; + + /* L5: load functions */ + for(n = 0; n < num_func; n++) { + int len; + char *name; + FBLOCK *fbp; + + read(fd, &len, 4); +/* fprintf(stderr, "LOAD function len=%d\n", len); */ + if (len > BIN_NAME_MAXLEN) + goto err; + name = mawk_zmalloc(MAWK, len+1); + read(fd, name, len); + name[len] = '\0'; +/* fprintf(stderr, "LOAD function name=%s\n", name); */ + fbp = bin_new_funct(MAWK, name, 1); + + read(fd, &fbp->nargs, 2); +/* fprintf(stderr, "LOAD nargs=%d\n", fbp->nargs); */ + if (fbp->nargs > 0) { + fbp->typev = mawk_zmalloc(MAWK, fbp->nargs); + read(fd, fbp->typev, fbp->nargs); + } + else + fbp->typev = NULL; + + bin_load_func(MAWK, fd, read, fbp); + if (mawk_da_bin(MAWK, fbp->code, &l, 0) == NULL) { + mawk_errmsg(MAWK, 0, "Can't link function %s", name); + goto err2; + } + } + + mawk_set_code(MAWK); + + if (bin_link_sect(MAWK, &l, MAWK->begin_start, "BEGIN")) + goto err; + if (bin_link_sect(MAWK, &l, MAWK->end_start, "END")) + goto err; + if (bin_link_sect(MAWK, &l, MAWK->main_start, "main")) + goto err; + + goto out; + err2:; + err:; + out:; + + link_free(&l); + + return ret; +} +#endif + diff --git a/src/libmawk/da_bin.h b/src/libmawk/da_bin.h new file mode 100644 index 0000000..063a0a2 --- /dev/null +++ b/src/libmawk/da_bin.h @@ -0,0 +1,13 @@ +#ifndef DA_BIN_H +#define DA_BIN_H +#include "mawk.h" + +/* load a precompiled script, using the provided read() on fd. Read will take + place sequentially, typically in small chunks */ +int mawk_load_code_bin_(mawk_state_t *MAWK, void *fd, int (*read)(void *fd, void *buff, size_t len)); + +/* dump a precompiled script in binary form using the provided write() on fd. + Write will take place sequentially, in small chunks. */ +int mawk_save_code_bin_(mawk_state_t * MAWK, void *fd, int (*write)(void *fd, const void *buff, size_t len)); + +#endif diff --git a/src/libmawk/da_bin_helper.c b/src/libmawk/da_bin_helper.c new file mode 100644 index 0000000..9a8a944 --- /dev/null +++ b/src/libmawk/da_bin_helper.c @@ -0,0 +1,52 @@ +/******************************************** +libmawk (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include +#include +#include +#include + +#include "da_bin.h" + +static int bin_read(void *fd, void *buff, size_t size) +{ + return read(*(int *)fd, buff, size); +} + +int mawk_load_code_bin(mawk_state_t *MAWK, const char *fn) +{ + int ret, fd; + + fd = open(fn, O_RDONLY); + if (fd < 0) + return MAWK_ECANTOPEN; + + ret = mawk_load_code_bin_(MAWK, &fd, bin_read); + + close(fd); + return ret; +} + +static int bin_write(void *fd, const void *buff, size_t size) +{ + return write(*(int *)fd, buff, size); +} + +int mawk_print_code_bin(mawk_state_t *MAWK, const char *fn) +{ + int ret, fd; + + fd = 1; + ret = mawk_save_code_bin_(MAWK, &fd, bin_write); + fsync(1); + + return ret; +} + diff --git a/src/libmawk/da_bin_helper.h b/src/libmawk/da_bin_helper.h new file mode 100644 index 0000000..0ee213d --- /dev/null +++ b/src/libmawk/da_bin_helper.h @@ -0,0 +1,11 @@ +#ifndef DA_BIN_H +#define DA_BIN_H +#include "mawk.h" + +/* Load precompiled binary script from a file */ +int mawk_load_code_bin(mawk_state_t *MAWK, const char *fn); + +/* Print precompiled binary from da to stdout */ +int mawk_print_code_bin(mawk_state_t *MAWK, const char *fn); + +#endif diff --git a/src/libmawk/da_common.c b/src/libmawk/da_common.c new file mode 100644 index 0000000..98820d6 --- /dev/null +++ b/src/libmawk/da_common.c @@ -0,0 +1,89 @@ +/******************************************** +da_common.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* disassemble code: common for text and bin */ + +#include +#include "mawk.h" + + +#include "code.h" +#include "bi_funct.h" +#include "repl.h" +#include "field.h" +#include "num.h" + +static const struct { + PF_CP action; + char *name; +} special_cases[] = { /* read-only */ + {mawk_bi_split, "split"}, + {mawk_bi_match, "match"}, + {mawk_bi_getline, "getline"}, + {mawk_bi_sub, "sub"}, + {mawk_bi_gsub, "gsub"}, + {mawk_bi_print, "print"}, + {mawk_bi_printf, "printf"}, + {(PF_CP) 0, NULL} +}; + + +const char *mawk_find_bi_name(PF_CP p) +{ + const BI_REC *q; + int i; + + for (q = mawk_bi_funct; q->name; q++) { + if (q->fp == p) { + /* found */ + return q->name; + } + } + /* next check some special cases */ + for (i = 0; special_cases[i].action; i++) { + if (special_cases[i].action == p) + return special_cases[i].name; + } + + return "unknown builtin"; +} + +PF_CP mawk_find_bi_ptr(const char *name) +{ + const BI_REC *q; + int i; + + for (q = mawk_bi_funct; q->name; q++) { + if (strcmp(q->name, name) == 0) { + /* found */ + return q->fp; + } + } + /* next check some special cases */ + for (i = 0; special_cases[i].action; i++) { + if (strcmp(special_cases[i].name, name) == 0) + return special_cases[i].action; + } + + return NULL; +} + +void mawk_add_to_fdump_list(mawk_state_t *MAWK, FBLOCK *fbp) +{ + struct mawk_fdump *p = MAWK_ZMALLOC(MAWK, struct mawk_fdump); + p->fbp = fbp; + p->link = MAWK->fdump_list; + MAWK->fdump_list = p; +} diff --git a/src/libmawk/da_text.c b/src/libmawk/da_text.c new file mode 100644 index 0000000..5e1d0df --- /dev/null +++ b/src/libmawk/da_text.c @@ -0,0 +1,409 @@ + +/******************************************** +da.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* disassemble code */ + +#include "mawk.h" +#include + +#include "code.h" +#include "bi_funct.h" +#include "repl.h" +#include "field.h" +#include "num.h" +#include "f2d.h" + +static const struct sc { + char op; + char *name; +} simple_code[] = { /* read-only */ + {_RANGE_STOP,"range_stop"}, + {FE_PUSHA, "fe_pusha"}, + {FE_PUSHI, "fe_pushi"}, + {A_TEST, "a_test"}, + {A_DEL, "a_del"}, + {DEL_A, "del_a"}, + {POP_AL, "pop_al"}, + {_POP, "pop"}, + {_ADD, "add"}, + {_SUB, "sub"}, + {_MUL, "mul"}, + {_DIV, "div"}, + {_MOD, "mod"}, + {_POW, "pow"}, + {_NOT, "not"}, + {_UMINUS, "uminus"}, + {_UPLUS, "uplus"}, + {_TEST, "mawk_test"}, + {_CAT, "cat"}, + {_ASSIGN, "assign"}, + {_ADD_ASG, "add_asg"}, + {_SUB_ASG, "sub_asg"}, + {_MUL_ASG, "mul_asg"}, + {_DIV_ASG, "div_asg"}, + {_MOD_ASG, "mod_asg"}, + {_POW_ASG, "pow_asg"}, + {NF_PUSHI, "nf_pushi"}, + {F_ASSIGN, "f_assign"}, + {F_ADD_ASG, "f_add_asg"}, + {F_SUB_ASG, "f_sub_asg"}, + {F_MUL_ASG, "f_mul_asg"}, + {F_DIV_ASG, "f_div_asg"}, + {F_MOD_ASG, "f_mod_asg"}, + {F_POW_ASG, "f_pow_asg"}, + {_POST_INC, "post_inc"}, + {_POST_DEC, "post_dec"}, + {_PRE_INC, "pre_inc"}, + {_PRE_DEC, "pre_dec"}, + {F_POST_INC, "f_post_inc"}, + {F_POST_DEC, "f_post_dec"}, + {F_PRE_INC, "f_pre_inc"}, + {F_PRE_DEC, "f_pre_dec"}, + {_EQ, "eq"}, + {_NEQ, "neq"}, + {_LT, "lt"}, + {_LTE, "lte"}, + {_GT, "gt"}, + {_GTE, "gte"}, + {_MATCH2, "match2"}, + {_EXIT, "exit"}, + {_EXIT0, "exit0"}, + {_NEXT, "next"}, + {_RET, "ret"}, + {_RET0, "ret0"}, + {_OMAIN, "omain"}, + {_JMAIN, "jmain"}, + {OL_GL, "ol_gl"}, + {OL_GL_NR, "ol_gl_nr"}, + {_ASSIGN_ARR, "assign_arr"}, + {_ADD_ASG_ARR, "add_asg_arr"}, + {_SUB_ASG_ARR, "sub_asg_arr"}, + {_MUL_ASG_ARR, "mul_asg_arr"}, + {_DIV_ASG_ARR, "div_asg_arr"}, + {_MOD_ASG_ARR, "mod_asg_arr"}, + {_POW_ASG_ARR, "pow_asg_arr"}, + {_POST_INC_ARR, "post_inc_arr"}, + {_POST_DEC_ARR, "post_dec_arr"}, + {_PRE_INC_ARR, "pre_inc_arr"}, + {_PRE_DEC_ARR, "pre_dec_arr"}, + {_HALT, NULL} +}; + +static const char *jfmt = "%s%s%03d\n"; /* read-only */ + /* format to print jumps */ +static const char *tab2 = "\t\t"; /* read-only */ + +void mawk_da(mawk_state_t *MAWK, INST *start, void *fp_) +{ + mawk_cell_t *cp; + FILE *fp = fp_; + register INST *p = start; + const char *name; + + while (p->op != _HALT) { + /* print the relative code address (label) */ + fprintf(fp, "%03d ", p - start); + + switch (p++->op) { + + case _PUSHC: + cp = (mawk_cell_t *) p++->ptr; + switch (cp->type) { + case C_RE: + fprintf(fp, "pushc\t<<%p>>\t/%s/\n", cp->ptr, mawk_re_uncompile(MAWK, cp->ptr)); + break; + + case C_SPACE: + fprintf(fp, "pushc\tspace split\n"); + break; + + case C_SNULL: + fprintf(fp, "pushc\tnull split\n"); + break; + case C_REPL: + fprintf(fp, "pushc\trepl\t%s\n", mawk_repl_uncompile(MAWK, cp)); + break; + case C_REPLV: + fprintf(fp, "pushc\treplv\t%s\n", mawk_repl_uncompile(MAWK, cp)); + break; + + default: + fprintf(fp, "pushc\tWEIRD\n");; + break; + } + break; + + case _PUSHD: + fprintf(fp, "pushd\t" NUM_FMT_DA "\n", *(mawk_num_t *) p++->ptr); + break; + case _PUSHS: + { + mawk_string_t *sval = (mawk_string_t *) p++->ptr; + fprintf(fp, "pushs\t\"%s\"\n", sval->str); + break; + } + + case _MATCH0: + case _MATCH1: + fprintf(fp, "match%d\t<<%p>>\t/%s/\n", p[-1].op == _MATCH1, p->ptr, mawk_re_uncompile(MAWK, p->ptr)); + p++; + break; + + case _PUSHA: + fprintf(fp, "pusha\t%s\n", mawk_reverse_find(MAWK, ST_VAR, &p++->ptr)); + break; + + case _PUSHI: + cp = (mawk_cell_t *) p++->ptr; + if (cp == MAWK->field) + fprintf(fp, "pushi\t$0\n"); + else if (cp == &MAWK->fs_shadow) + fprintf(fp, "pushi\t@fs_shadow\n"); + else { + if (cp > MAWK_NF && cp <= LAST_PFIELD) + name = mawk_reverse_find(MAWK, ST_FIELD, &cp); + else + name = mawk_reverse_find(MAWK, ST_VAR, &cp); + + fprintf(fp, "pushi\t%s\n", name); + } + break; + + case L_PUSHA: + fprintf(fp, "l_pusha\t%ld\n", (long)p++->op); + break; + + case L_PUSHI: + fprintf(fp, "l_pushi\t%ld\n", (long)p++->op); + break; + + case LAE_PUSHI: + fprintf(fp, "lae_pushi\t%ld\n", (long)p++->op); + break; + + case LAE_PUSHA: + fprintf(fp, "lae_pusha\t%ld\n", (long)p++->op); + break; + + case LAE_PUSHA_WRARR: + fprintf(fp, "lae_pusha_wr\t%ld\n", (long)p++->op); + break; + + case LA_PUSHA: + fprintf(fp, "la_pusha\t%ld\n", (long)p++->op); + break; + + case F_PUSHA: + cp = (mawk_cell_t *) p++->ptr; + if (cp >= MAWK_NF && cp <= LAST_PFIELD) + fprintf(fp, "f_pusha\t%s\n", mawk_reverse_find(MAWK, ST_FIELD, &cp)); + else + fprintf(fp, "f_pusha\t$%d\n", mawk_field_addr_to_index(MAWK, cp)); + break; + + case F_PUSHI: + p++; + fprintf(fp, "f_pushi\t$%ld\n", (long)p++->op); + break; + + case AE_PUSHA: + fprintf(fp, "ae_pusha\t%s\n", mawk_reverse_find(MAWK, ST_ARRAY, &p++->ptr)); + break; + + case AE_PUSHA_WRARR: + fprintf(fp, "ae_pusha_wr\t%s\n", mawk_reverse_find(MAWK, ST_ARRAY, &p++->ptr)); + break; + + case AE_PUSHI: + fprintf(fp, "ae_pushi\t%s\n", mawk_reverse_find(MAWK, ST_ARRAY, &p++->ptr)); + break; + + case A_PUSHA: + fprintf(fp, "a_pusha\t%s\n", mawk_reverse_find(MAWK, ST_ARRAY, &p++->ptr)); + break; + + case _PUSHINT: + fprintf(fp, "pushint\t%ld\n", (long)p++->op); + break; + + case _BUILTIN: + fprintf(fp, "%s\n", mawk_find_bi_name((PF_CP) mawk_d2f(p++->ptr))); + break; + + case _PRINT: + fprintf(fp, "%s\n", (PF_CP) mawk_d2f(p++->ptr) == mawk_bi_printf ? "printf" : "print"); + break; + + case _JMP: + fprintf(fp, jfmt, "jmp", tab2, (p - start) + p->op); + p++; + break; + + case _JNZ: + fprintf(fp, jfmt, "jnz", tab2, (p - start) + p->op); + p++; + break; + + case _JZ: + fprintf(fp, jfmt, "jz", tab2, (p - start) + p->op); + p++; + break; + + case _LJZ: + fprintf(fp, jfmt, "ljz", tab2, (p - start) + p->op); + p++; + break; + + case _LJNZ: + fprintf(fp, jfmt, "ljnz", tab2 + 1, (p - start) + p->op); + p++; + break; + + case SET_ALOOP: + fprintf(fp, "set_al\t%03d\n", p + p->op - start); + p++; + break; + + case ALOOP: + fprintf(fp, "aloop\t%03ld\n", (long)(p - start + p->op)); + p++; + break; + + case A_CAT: + fprintf(fp, "a_cat\t%ld\n", (long)p++->op); + break; + + case _CALL: + fprintf(fp, "call\t%s\t%ld\n", ((FBLOCK *) p->ptr)->name, (long)p[1].op); + p += 2; + break; + + case _RANGE_CHK: + fprintf(fp, "range\t%03ld %03ld %03ld\n", + /* label for pat2, action, follow */ + (long)(p - start + p[1].op), (long)(p - start + p[2].op), (long)(p - start + p[3].op)); + p += 4; + break; + + case LOCATION: + fprintf(fp, "(location)\t%ld\n", (long)p[0].op); + p++; + break; + + default: + { + const struct sc *q = simple_code; + int k = (p - 1)->op; + + while (q->op != _HALT && q->op != k) + q++; + + if (q->op != _HALT) + fprintf(fp, "%s\n", q->name); + else + fprintf(fp, "bad instruction %d\n", k); + } + break; + } + } + fflush(fp); +} + +int mawk_fdump(mawk_state_t * MAWK) +{ + register struct mawk_fdump *p, *q = MAWK->fdump_list; + int dumped = 0; + + while (q) { + p = q; + q = p->link; + if (dumped) + fprintf(stdout, "\n"); + fprintf(stdout, "function %s\n", p->fbp->name); + mawk_da(MAWK, p->fbp->code, stdout); + dumped++; + } + return dumped; +} + +static void mawk_symdump(mawk_state_t * MAWK, int verbose) +{ + HASHNODE *p; + int i; + for (i = 0; i < HASH_PRIME; i++) { + p = MAWK->hash_table[i]; + while (p) { + switch (p->symtab.type) { + case ST_NONE: + break; + case ST_VAR: fprintf(stdout, "%s var\n", p->symtab.name); break; + case ST_KEYWORD: if (verbose) fprintf(stdout, "%s keyword\n", p->symtab.name); break; + case ST_BUILTIN: if (verbose) fprintf(stdout, "%s builtin\n", p->symtab.name); break; + case ST_ARRAY: fprintf(stdout, "%s array\n", p->symtab.name); break; + case ST_FIELD: if (verbose) fprintf(stdout, "%s field\n", p->symtab.name); break; + case ST_FUNCT: if (verbose) fprintf(stdout, "%s funct\n", p->symtab.name); break; + case ST_NR: if (verbose) fprintf(stdout, "%s \n", p->symtab.name); break; + case ST_LENGTH: if (verbose) fprintf(stdout, "%s \n", p->symtab.name); break; + case ST_LOCAL_NONE: + case ST_LOCAL_VAR: + case ST_LOCAL_ARRAY: + fprintf(stdout, "%s ?local?\n", p->symtab.name); break; + case ST_C_FUNCTION: if (verbose) fprintf(stdout, "%s C-function\n", p->symtab.name); break; + } + p = p->link; + } + } +} + +void mawk_dump_code_text(mawk_state_t * MAWK) +{ + int fdmp; + + /* dumps all user functions */ + fdmp = mawk_fdump(MAWK); + + if (MAWK->begin_start) { + if (fdmp) + fprintf(stdout, "\n"); + fprintf(stdout, "BEGIN\n"); + mawk_da(MAWK, MAWK->begin_start, stdout); + fdmp++; + } + + if (MAWK->main_start) { + if (fdmp) + fprintf(stdout, "\n"); + fprintf(stdout, "MAIN\n"); + mawk_da(MAWK, MAWK->main_start, stdout); + fdmp++; + } + + if (MAWK->end_start) { + if (fdmp) + fprintf(stdout, "\n"); + fprintf(stdout, "END\n"); + mawk_da(MAWK, MAWK->end_start, stdout); + fdmp++; + } +} + +void mawk_dump_sym_text(mawk_state_t * MAWK) +{ + fprintf(stdout, "SYMBOLS (global)\n"); + mawk_symdump(MAWK, 0); + + (void)mawk_f2d(NULL); /* suppress compiler warning on unused func */ +} diff --git a/src/libmawk/debug.c b/src/libmawk/debug.c new file mode 100644 index 0000000..51b266b --- /dev/null +++ b/src/libmawk/debug.c @@ -0,0 +1,62 @@ +/******************************************** +libmawk (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include +#include "mawk.h" + +/* static mawk_state_t *mawk_debugging = NULL; */ + +void mawk_breakpoint(mawk_state_t * MAWK) +{ + +} + +void mawk_location_change(mawk_state_t * MAWK, int new_token_line) +{ + MAWK->token_lineno = new_token_line; +/* mawk_debugging = MAWK; */ + mawk_breakpoint(MAWK); +} + +void mawk_debug_callstack_push(mawk_state_t * MAWK, FBLOCK * f) +{ + mawk_debug_callstack_t *s; + +/* printf("PUSH: %s\n", f->name); */ + s = malloc(sizeof(mawk_debug_callstack_t)); + s->f = f; + s->next = MAWK->debug_callstack; + MAWK->debug_callstack = s; +} + +void mawk_debug_callstack_pop(mawk_state_t * MAWK) +{ + mawk_debug_callstack_t *s = MAWK->debug_callstack; + + if (s != NULL) { + MAWK->debug_callstack = MAWK->debug_callstack->next; + free(s); + } +} + +void mawk_debug_where(mawk_state_t * MAWK) +{ + mawk_debug_callstack_t *s; + int n; + + printf("mawk call stack:\n"); + for (n = 0, s = MAWK->debug_callstack; s != NULL; s = s->next, n++) { + printf(" #%d %s\n", n, s->f->name); + } +} + +/* name code size nargs typev */ +const FBLOCK mawk_debug_main = { "MAIN", NULL, 0, 0, "" }; +const FBLOCK mawk_debug_begin = { "BEGIN", NULL, 0, 0, "" }; diff --git a/src/libmawk/debug.h b/src/libmawk/debug.h new file mode 100644 index 0000000..cc830a9 --- /dev/null +++ b/src/libmawk/debug.h @@ -0,0 +1,8 @@ +#include "mawk.h" + +void mawk_location_change(mawk_state_t * MAWK, int new_token_line); +void mawk_debug_callstack_pop(mawk_state_t * MAWK); +void mawk_debug_callstack_push(mawk_state_t * MAWK, FBLOCK * f); + +extern FBLOCK mawk_debug_main; +extern FBLOCK mawk_debug_begin; diff --git a/src/libmawk/error.c b/src/libmawk/error.c new file mode 100644 index 0000000..2eee157 --- /dev/null +++ b/src/libmawk/error.c @@ -0,0 +1,278 @@ + +/******************************************** +error.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, 1992 Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "conf.h" + +#warning this should be gone: +#include + +#include "mawk.h" +#include "scan.h" +#include "bi_vars.h" +#include "vargs.h" +#include "zmalloc.h" +#include "memory.h" +#include "cell.h" + +#ifndef EOF +#define EOF (-1) +#endif + +static void rt_where(mawk_state_t *); +static char *type_to_str(mawk_state_t *, int); + + +static const char *strerrors[] = { + "succes", + "can not open file", + "internal error: header size mismatch", + "bad file magic", + "wrong byte order", + "version not supported", + "wrong instruction size", + "wrong numeric size", + "can not allocate memory", + NULL +}; + +const char *mawk_strerror(mawk_errno_t err) +{ + if (err > 0) + return "Unknown error (positive)"; + if (err < MAWK_Elast) + return "Unknown error (negative)"; + return strerrors[-err]; +} + +#ifdef NO_VFPRINTF +#define vfprintf simple_vfprintf +#endif + +/* generic error message with a hook into the system error + messages if errnum > 0 */ + +void mawk_errmsg VA_ALIST2(int, errnum, char *, format) +{ + va_list args; + + fprintf(stderr, "%s: ", MAWK->progname); + VA_START2(args, int, errnum, char *, format); + vfprintf(stderr, format, args); + va_end(args); + + if (errnum > 0) + fprintf(stderr, " (%s)", strerror(errnum)); + + fprintf(stderr, "\n"); +} + +void mawk_compile_error VA_ALIST(const char *, format) +{ + va_list args; + const char *s0, *s1; + + /* with multiple program files put program name in + error message */ + if (MAWK->ps.pfile_name) { + s0 = MAWK->ps.pfile_name; + s1 = ": "; + } + else { + s0 = s1 = ""; + } + + fprintf(stderr, "%s: %s%sline %u: ", MAWK->progname, s0, s1, MAWK->token_lineno); + VA_START(args, char *, format); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\n"); + if (++(MAWK->compile_error_count) == MAX_COMPILE_ERRORS) + mawk_exit(MAWK, 2); +} + +void mawk_rt_error VA_ALIST(const char *, format) +{ + va_list args; + + fprintf(stderr, "%s: run time error: ", MAWK->progname); + VA_START(args, char *, format); + vfprintf(stderr, format, args); + va_end(args); + putc('\n', stderr); + rt_where(MAWK); + MAWK->rt_exit_code = 2; + mawk_exit(MAWK, 2); +} + + +void mawk_bozo(MAWK, s) + mawk_state_t *MAWK; + char *s; +{ + mawk_errmsg(MAWK, 0, "mawk_bozo: %s", s); +#ifdef CELLDEBUG + abort(); +#endif +#ifdef DEBUG + abort(); +#endif + mawk_exit(MAWK, 3); +} + +void mawk_overflow(mawk_state_t *MAWK, char *s, unsigned size) +{ + mawk_errmsg(MAWK, 0, "program limit exceeded: %s size=%u", s, size); + mawk_exit(MAWK, 2); +} + + +/* print as much as we know about where a rt error occured */ + +static void rt_where(mawk_state_t * MAWK) +{ + if (FILENAME->type != C_STRING) + mawk_cast1_to_str(MAWK, FILENAME); + + fprintf(stderr, "\tFILENAME=\"%s\" FNR=%u NR=%u\n", string(FILENAME)->str, MAWK->rt_fnr, MAWK->rt_nr); +} + +/* run time */ +void mawk_rt_overflow(mawk_state_t *MAWK, char *s, unsigned size) +{ + mawk_errmsg(MAWK, 0, "program limit exceeded: %s size=%u", s, size); + rt_where(MAWK); + mawk_exit(MAWK, 2); +} + +static char *type_to_str(mawk_state_t *MAWK, int type) +{ + char *retval; + + switch (type) { + case ST_VAR: + retval = "variable"; + break; + case ST_ARRAY: + retval = "array"; + break; + case ST_FUNCT: + retval = "function"; + break; + case ST_LOCAL_VAR: + retval = "local variable"; + break; + case ST_LOCAL_ARRAY: + retval = "local array"; + break; + default: + mawk_bozo(MAWK, "type_to_str"); + } + return retval; +} + +/* emit an error message about a type clash */ +void mawk_type_error(mawk_state_t *MAWK, SYMTAB *p) +{ + mawk_compile_error(MAWK, "illegal reference to %s %s", type_to_str(MAWK, p->type), p->name); +} + + + +#ifdef NO_VFPRINTF + +/* a minimal vfprintf */ +int simple_vfprintf(FILE *fp, char *format, va_list argp) +{ + char *q, *p, *t; + int l_flag; + char xbuff[64]; + + q = format; + xbuff[0] = '%'; + + while (*q != 0) { + if (*q != '%') { + putc(*q, fp); + q++; + continue; + } + + /* mark the start with p */ + p = ++q; + t = xbuff + 1; + + if (*q == '-') + *t++ = *q++; + while (scan_code[*(unsigned char *) q] == SC_DIGIT) + *t++ = *q++; + if (*q == '.') { + *t++ = *q++; + while (scan_code[*(unsigned char *) q] == SC_DIGIT) + *t++ = *q++; + } + + if (*q == 'l') { + l_flag = 1; + *t++ = *q++; + } + else + l_flag = 0; + + + *t = *q++; + t[1] = 0; + + switch (*t) { + case 'c': + case 'd': + case 'o': + case 'x': + case 'u': + if (l_flag) + fprintf(fp, xbuff, va_arg(argp, long)); + else + fprintf(fp, xbuff, va_arg(argp, int)); + break; + + case 's': + fprintf(fp, xbuff, va_arg(argp, char *)); + break; + +#ifndef MAWK_NO_FLOAT + case 'g': + case 'f': + fprintf(fp, xbuff, va_arg(argp, double)); + break; +#endif + + default: + putc('%', fp); + q = p; + break; + } + } + return 0; /* shut up */ +} + +#endif /* USE_SIMPLE_VFPRINTF */ + + +void mawk_set_errno(mawk_state_t * MAWK, const char *error) +{ + mawk_cell_destroy(MAWK, ERRNO); + ERRNO->type = C_STRING; + ERRNO->ptr = (PTR) mawk_new_STRING(MAWK, error); +} diff --git a/src/libmawk/examples/ct_length.awk b/src/libmawk/examples/ct_length.awk new file mode 100755 index 0000000..424cff8 --- /dev/null +++ b/src/libmawk/examples/ct_length.awk @@ -0,0 +1,28 @@ +#!/usr/local/bin/mawk -f + +# ct_length.awk +# +# replaces all length +# by length($0) +# + + +{ + + while ( i = index($0, "length") ) + { + printf "%s" , substr($0,1, i+5) # ...length + $0 = substr($0,i+6) + + if ( match($0, /^[ \t]*\(/) ) + { + # its OK + printf "%s", substr($0, 1, RLENGTH) + $0 = substr($0, RLENGTH+1) + } + else # length alone + printf "($0)" + + } + print +} diff --git a/src/libmawk/examples/decl.awk b/src/libmawk/examples/decl.awk new file mode 100644 index 0000000..5460539 --- /dev/null +++ b/src/libmawk/examples/decl.awk @@ -0,0 +1,143 @@ + +# parse a C declaration by recursive descent +# based on a C program in KR ANSI edition +# +# run on a C file it finds the declarations +# +# restrictions: one declaration per line +# doesn't understand struct {...} +# makes assumptions about type names +# +# +# some awks need double escapes on strings used as +# regular expressions. If not run on mawk, use gdecl.awk + + +################################################ +# lexical scanner -- gobble() +# input : string s -- treated as a regular expression +# gobble eats SPACE, then eats longest match of s off front +# of global variable line. +# Cuts the matched part off of line +# + + +function gobble(s, x) +{ + sub( /^ /, "", line) # eat SPACE if any + + # surround s with parenthesis to make sure ^ acts on the + # whole thing + + match(line, "^" "(" s ")") + x = substr(line, 1, RLENGTH) + line = substr(line, RLENGTH+1) + return x +} + + +function ptr_to(n, x) # print "pointer to" , n times +{ n = int(n) + if ( n <= 0 ) return "" + x = "pointer to" ; n-- + while ( n-- ) x = x " pointer to" + return x +} + + +#recursively get a decl +# returns an english description of the declaration or +# "" if not a C declaration. + +function decl( x, t, ptr_part) +{ + + x = gobble("[* ]+") # get list of *** ... + gsub(/ /, "", x) # remove all SPACES + ptr_part = ptr_to( length(x) ) + + # We expect to see either an identifier or '(' + # + + if ( gobble("\(") ) + { + # this is the recursive descent part + # we expect to match a declaration and closing ')' + # If not return "" to indicate failure + + if ( (x = decl()) == "" || gobble( "\)" ) == "" ) return "" + + } + else # expecting an identifier + { + if ( (x = gobble(id)) == "" ) return "" + x = x ":" + } + + # finally look for () + # or [ opt_size ] + + while ( 1 ) + if ( gobble( funct_mark ) ) x = x " function returning" + else + if ( t = gobble( array_mark ) ) + { gsub(/ /, "", t) + x = x " array" t " of" + } + else break + + + x = x " " ptr_part + return x +} + + +BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" + funct_mark = "\([ \t]*\)" + array_mark = "\[[ \t]*[_A-Za-z0-9]*[ \t]*\]" + +# I've assumed types are keywords or all CAPS or end in _t +# Other conventions could be added. + + type0 = "int|char|short|long|double|float|void" + type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS + type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t + + types = "(" type0 "|" type1 "|" type2 ")" +} + + +{ + + gsub( "/\*([^*]|\*[^/])*(\*/|$)" , " ") # remove comments + gsub( /[ \t]+/, " ") # squeeze white space to a single space + + + line = $0 + + scope = gobble( "extern|static" ) + + if ( type = gobble("(struct|union|enum) ") ) + type = type gobble(id) # get the tag + else + { + + type = gobble("(un)?signed ") gobble( types ) + + } + + if ( ! type ) next + + if ( (x = decl()) && gobble( ";") ) + { + x = x " " type + if ( scope ) x = x " (" scope ")" + gsub( / +/, " ", x) # + print x + } + +} + + + + diff --git a/src/libmawk/examples/deps.awk b/src/libmawk/examples/deps.awk new file mode 100644 index 0000000..f5d2c53 --- /dev/null +++ b/src/libmawk/examples/deps.awk @@ -0,0 +1,57 @@ + +# find include dependencies in C source +# +# mawk -f deps.awk C_source_files +# -- prints a dependency list suitable for make +# -- ignores #include < > +# + + +BEGIN { stack_index = 0 # stack[] holds the input files + + for(i = 1 ; i < ARGC ; i++) + { + file = ARGV[i] + if ( file !~ /\.[cC]$/ ) continue # skip it + outfile = substr(file, 1, length(file)-2) ".o" + + # INCLUDED[] stores the set of included files + # -- start with the empty set + for( j in INCLUDED ) delete INCLUDED[j] + + while ( 1 ) + { + if ( getline line < file <= 0 ) # no open or EOF + { close(file) + if ( stack_index == 0 ) break # empty stack + else + { file = stack[ stack_index-- ] + continue + } + } + + if ( line ~ /^#include[ \t]+".*"/ ) + { + split(line, X, "\"") # filename is in X[2] + + if ( X[2] in INCLUDED ) # we've already included it + continue + + #push current file + stack[ ++stack_index ] = file + INCLUDED[ file = X[2] ] = "" + } + } # end of while + + # test if INCLUDED is empty + flag = 0 # on once the front is printed + for( j in INCLUDED ) + if ( ! flag ) + { printf "%s : %s" , outfile, j ; flag = 1 } + else printf " %s" , j + + if ( flag ) print "" + + }# end of loop over files in ARGV[i] + +} diff --git a/src/libmawk/examples/eatc.awk b/src/libmawk/examples/eatc.awk new file mode 100644 index 0000000..c2c64ba --- /dev/null +++ b/src/libmawk/examples/eatc.awk @@ -0,0 +1,32 @@ + + +# eatc.awk +# another program to remove comments +# + + +{ while( t = index($0 , "/*") ) + { + printf "%s" , substr($0,1,t-1) + $0 = eat_comment( substr($0, t+2) ) + } + + print +} + + +function eat_comment(s, t) +{ + #replace comment by one space + printf " " + + while ( (t = index(s, "*/")) == 0 ) + if ( getline s == 0 ) + { # input error -- unterminated comment + system("/bin/sh -c 'echo unterminated comment' 1>&2") + exit 1 + } + + return substr(s,t+2) +} + diff --git a/src/libmawk/examples/gdecl.awk b/src/libmawk/examples/gdecl.awk new file mode 100644 index 0000000..f99773b --- /dev/null +++ b/src/libmawk/examples/gdecl.awk @@ -0,0 +1,136 @@ + +# parse a C declaration by recursive descent +# +# decl.awk with extra escapes \ + +################################################ +############################################ + + +# lexical scanner -- gobble() +# input : string s -- treated as a regular expression +# gobble eats SPACE, then eats longest match of s off front +# of global variable line. +# Cuts the matched part off of line +# + + +function gobble(s, x) +{ + sub( /^ /, "", line) # eat SPACE if any + + # surround s with parenthesis to make sure ^ acts on the + # whole thing + + match(line, "^" "(" s ")") + x = substr(line, 1, RLENGTH) + line = substr(line, RLENGTH+1) + return x +} + + +function ptr_to(n, x) # print "pointer to" , n times +{ n = int(n) + if ( n <= 0 ) return "" + x = "pointer to" ; n-- + while ( n-- ) x = x " pointer to" + return x +} + + +#recursively get a decl +# returns an english description of the declaration or +# "" if not a C declaration. + +function decl( x, t, ptr_part) +{ + + x = gobble("[* ]+") # get list of *** ... + gsub(/ /, "", x) # remove all SPACES + ptr_part = ptr_to( length(x) ) + + # We expect to see either an identifier or '(' + # + + if ( gobble("\\(") ) + { + # this is the recursive descent part + # we expect to match a declaration and closing ')' + # If not return "" to indicate failure + + if ( (x = decl()) == "" || gobble( "\\)" ) == "" ) return "" + + } + else # expecting an identifier + { + if ( (x = gobble(id)) == "" ) return "" + x = x ":" + } + + # finally look for () + # or [ opt_size ] + + while ( 1 ) + if ( gobble( funct_mark ) ) x = x " function returning" + else + if ( t = gobble( array_mark ) ) + { gsub(/ /, "", t) + x = x " array" t " of" + } + else break + + + x = x " " ptr_part + return x +} + + +BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" + funct_mark = "\\([ \t]*\\)" + array_mark = "\\[[ \t]*[_A-Za-z0-9]*[ \t]*\\]" + +# I've assumed types are keywords or all CAPS or end in _t +# Other conventions could be added. + + type0 = "int|char|short|long|double|float|void" + type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS + type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t + + types = "(" type0 "|" type1 "|" type2 ")" +} + + +{ + + gsub( /\/\*([^*]|\*[^\/])*(\*\/|$)/ , " ") # remove comments + gsub( /[ \t]+/, " ") # squeeze white space to a single space + + + line = $0 + + scope = gobble( "extern|static" ) + + if ( type = gobble("(struct|union|enum) ") ) + type = type gobble(id) # get the tag + else + { + + type = gobble("(un)?signed ") gobble( types ) + + } + + if ( ! type ) next + + if ( (x = decl()) && gobble( ";") ) + { + x = x " " type + if ( scope ) x = x " (" scope ")" + gsub( / +/, " ", x) # + print x + } + +} + + + + diff --git a/src/libmawk/examples/hcal b/src/libmawk/examples/hcal new file mode 100755 index 0000000..8aaeb05 --- /dev/null +++ b/src/libmawk/examples/hcal @@ -0,0 +1,418 @@ +#!/usr/local/bin/mawk -We +# edit the above to be the full pathname of 'mawk' +# @(#) hcal - v01.00.02 - Tue Feb 27 21:21:21 EST 1996 +# @(#) prints a 3-month (highlighted) calendar centered on the target month +# @(#) may be edited for week to start with Sun or Mon & for local language +# @(#) to display a usage screen, execute: hcal -h +# NOTE: to edit, set ts=4 in 'vi' (or equivalent) +# to print, pipe through 'pr -t -e4' + +# Using ideas from a KornShell script by Mikhail Kuperblum (mikhail@klm.com) +# Bob Stockler - bob@trebor.iglou.com - Sysop CompuServe SCOForum [75162,1612] + +BEGIN { +# Local Edits: + PROG = "hcal" # Program name given to this script +# FMT = 0 # date format dd/mm/yyyy +# FMT1 = 0 # for weekdays ordered "Mo Tu We Th Fr Sa Su" + FMT = 1 # date format mm/dd/yyyy + FMT1 = 1 # for weekdays ordered "Su Mo Tu We Th Fr Sa" +# edit day & month names and abbreviations for local language names + Days[0] = "Mo Tu We Th Fr Sa Su" + Days[1] = "Su Mo Tu We Th Fr Sa" + MONTHS = "January February March April May June July August" + MONTHS = MONTHS " September October November December" + Months = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec" +# STDOUT = 0 # emulate SCO Unix 'cal' (NO highlighting) + STDOUT = 1 # default to highlight mode + MINUS = "-" # possible input date field delimiter + SLASH = "/" # possible input date field delimiter + DOT = "." # possible input date field delimiter + IDFD = "[" MINUS # make MINUS the first character in this series + IDFD = IDFD SLASH # so that it stands for itself in the RE + IDFD = IDFD DOT "]" # Input Date Field Delimiters RE + ODFD = SLASH # Output Date Field Delimiter (default) + DATE_FMT = "%.2d%s%.2d%s%.4d" # date format +## this script presumes 'date' recognizes these arguments in these ways: +## w - Day of the week - Sunday = 0 +## m - Month of year - 01 to 12 +## d - Day of month - 01 to 31 +## y Last 2 digits of year - 00 to 99 +## Y - Year (including century), as decimal numbers +## j - Day of the year - 001 to 366 (Julian date) +## T - Time as HH:MM:SS +## X Current time, as defined by the locale +## a - Abbreviated weekday - Sun to Sat +## b - Abbreviated month name +## Z - Timezone name, or no characters if no timezone exists +## Command to get today's date information: +## DATE = "/bin/date '+%w %m %d 19%y %j~%a %b %d %T %Z 19%y'" +## For sunos4 +## DATE = DATE = "/bin/date '+%w %m %d 19%y %j~%a %h %d %T 19%y'" + DATE = "/bin/date '+%w %m %d %Y %j~%a %b %d %X %Z %Y'" +# End of Local Edits + + INT_RE = "^[0-9]+$" # unsigned integer RE + S_INT_RE = "^[-+][0-9]+$" # signed integer RE + MNAM_RE = "^[A-Za-z]+$" # month name RE + YEAR_RE = "^[0-9]?[0-9]?[0-9]?[0-9]$" + DATE_RE = "^[0-9]?[0-9]" IDFD "[0-9]?[0-9]" IDFD "[0-9]?[0-9]?[0-9]?[0-9]$" + DAT1_RE = "^[0-9]?[0-9]" IDFD "[0-9]?[0-9]$" + + split(Months,M_Name) + split("31 28 31 30 31 30 31 31 30 31 30 31",Mdays) ; Mdays[0] = 0 + + NUM_ARGS = ARGC - 1 + if ( ARGV[1] == "-x" ) { + # standout mode switch + if ( STDOUT == 1 ) STDOUT = 0 ; else STDOUT = 1 + ARG1 = ARGV[2] ; ARG2 = ARGV[3] ; NUM_ARGS -= 1 + } + else if ( ARGV[1] ~ /^-[h?]$/ ) { HELP = 1 ; exit } + else { ARG1 = ARGV[1] ; ARG2 = ARGV[2] } + + if ( STDOUT == 1 ) { + # get the terminal standout-start & standout-end control codes + so = ENVIRON["so"] ; if ( ! so ) "tput smso" | getline so + se = ENVIRON["se"] ; if ( ! se ) "tput rmso" | getline se + } + + if ( NUM_ARGS == 0 ) { + # no arguments - print a calendar display centered on today + DEFAULT = 1 + } + else if ( NUM_ARGS == 1 ) { + # one argument - may be a month name, date, year, or interval of days + if ( ARG1 ~ DATE_RE ) DATE1 = Fmt_Date(ARG1) + else if ( ARG1 ~ DAT1_RE ) DATE1 = ARG1 + else if ( ARG1 ~ MNAM_RE ) { Get_Mnum() ; DATE1 = RMSO = ARG1 "/1" } + else if ( ARG1 ~ S_INT_RE ) INTERVAL = ARG1 + 0 + else if ( ARG1 ~ INT_RE ) { + if ( ARG1 > 0 && ARG1 <= 9999 ) YEAR = ARG1 + 0 + else if ( ARG1 > 9999 ) { ERR = 9 ; exit } + else { ERR = 7 ; exit } + } + else { ERR = 1 ; exit } + } + else if ( NUM_ARGS == 2 ) { + # two arguments, the second of which must be an integer + if ( ARG2 ~ INT_RE ) { + ARG2 = ARG2 + 0 + if ( ARG2 < 1 ) { ERR = 7 ; exit } + else if ( ARG2 > 9999 ) { ERR = 9 ; exit } + } + else { ERR = 1 ; exit } + RMSO = 1 + # the first may be a string or an integer + if ( ARG1 ~ INT_RE ) { + # a month number and a year + if ( ARG1 < 1 || ARG1 > 12 ) { ERR = 4 ; mm = ARG1 ; exit } + } + else if ( ARG1 ~ MNAM_RE ) { + Get_Mnum() + } + else { ERR = 6 ; exit } + DATE1 = ARG1 "/1/" ARG2 + } + else { ERR = 2 ; exit } + + if ( DEFAULT ) { Get_Now() } + else if ( INTERVAL ) { + Get_Now() + daynum = daynum + ( INTERVAL % 7 ) + this_date = "" + DATE1 = Get_Date(INTERVAL,m,d,y,j) + split(DATE1,mdy,IDFD) + Mon[2] = mdy[1] + 0 + today = mdy[2] + 0 + Year[1] = Year[2] = Year[3] = mdy[3] + 0 + } + else if ( DATE1 ) { + Get_Now() + if ( split(DATE1,mdy,IDFD) == 2 ) DATE1 = DATE1 "/" This_Year + Chk_Date(DATE1) + Mon[2] = mdy[1] + 0 + today = mdy[2] + 0 + Year[1] = Year[2] = Year[3] = mdy[3] + 0 + DATE1 = sprintf( "%.2d/%.2d/%.4d", Mon[2], today, Year[2] ) + INTERVAL = Get_Num(DATE1,m,d,y,j) + daynum = daynum + ( INTERVAL % 7 ) + this_date = "" + } + else if ( YEAR ) { + so = se = "" + Get_Now() + Mon[2] = 2 + today = 1 + Year[1] = Year[2] = Year[3] = YEAR + DATE1 = sprintf( "%.2d/%.2d/%.4d", Mon[2], today, Year[2] ) + INTERVAL = Get_Num(DATE1,m,d,y,j) + daynum = daynum + ( INTERVAL % 7 ) + this_date = "" + } + else { ERR = 5 ; exit } + + if ( Mon[2] != 1 ) Mon[1] = Mon[2] - 1 + else { Mon[1] = 12 ; Year[1] -= 1 } + if ( Mon[2] != 12 ) Mon[3] = Mon[2] + 1 + else { Mon[3] = 1 ; Year[3] += 1 } + if ( Mon[1] == 2 ) Leap(Year[1]) + else if ( Mon[2] == 2 ) Leap(Year[2]) + else if ( Mon[3] == 2 ) Leap(Year[3]) + + Start[2] = 7 - ( ( today - daynum ) % 7 ) + Start[1] = 7 - ( ( Mdays[Mon[1]] - Start[2] ) % 7 ) + Start[3] = ( Mdays[Mon[2]] + Start[2] ) % 7 + + if ( ! YEAR ) quarters = 1 + else { + quarters = 4 ; s[3] = Start[3] + for (i=4;i<=12;i++) { s[i] = ( Mdays[i-1] + s[i-1] ) % 7 } + } + for ( quarter = 1 ; quarter <= quarters ; quarter++ ) { + if ( quarter > 1 ) { + delete cal + ll = 0 ; Mon[1] += 3 ; Mon[2] += 3 ; Mon[3] += 3 + Start[1] = s[Mon[1]] ; Start[2] = s[Mon[2]] ; Start[3] = s[Mon[3]] + } + if ( Year[2] == 1752 && Mon[2] ~ /8|9|10/ ) Kludge_1752() + if ( ARG1 ) print "" ; else printf( "\n%s\n\n", this_date ) + for (i=1;i<=3;i++) { while ( Start[i] >= 7 ) Start[i] -= 7 } + for (mm=1;mm<=3;mm++) { l = 1 + if ( mm != 2 ) { So = Se = "" } else { So = so ; Se = se } + cal[mm SUBSEP l++] = sprintf( "%s %-4s%.4d %s ", \ + So, M_Name[Mon[mm]], Year[mm], Se ) + cal[mm SUBSEP l++] = sprintf( "%s%3s", Days[FMT1], "" ) + j = k = 1 + while ( j <= Mdays[Mon[mm]] ) { + line = "" + for (i=1;i<=7;i++) { + if ( Start[mm] > 0 || j > Mdays[Mon[mm]] ) { + date = "" ; Start[mm]-- } + else date = j++ + if ( Year[mm] == 1752 && Mon[mm] == 9 && date == 3 ) { + date = 14 ; j = 15 } + if ( date == today && mm == 2 && ! RMSO ) { + So = so ; Se = se } + else { So = Se = "" } + line = sprintf( "%s%s%2s%s ", line, So, date, Se ) + } + cal[mm SUBSEP l++] = sprintf( "%s ", line ) + } + if ( l > ll ) ll = l + } + for (l=1;l"/dev/tty" + print usage >"/dev/tty" + exit ERR +} + +function Get_Now() { + # get the week, month, date & year numbers and the time-of-day + DATE | getline date + split(date,Date,"~") + split(Date[1],field) + daynum = field[1] + FMT1 + m = field[2] ; This_Mon = Mon[2] = m + 0 + d = field[3] ; This_Date = today = d + 0 + y = This_Year = Year[1] = Year[2] = Year[3] = field[4] + j = julian = field[5] + 0 + this_date = Date[2] +} + +function Fmt_Date(date) { + # format dates as mm/dd/yyyy or dd/mm/yyyy + split(date,MorD_DorM_Y,IDFD) + if ( FMT == 1 ) { Dt_Fld1 = MorD_DorM_Y[1] ; Dt_Fld2 = MorD_DorM_Y[2] } + else { Dt_Fld1 = MorD_DorM_Y[2] ; Dt_Fld2 = MorD_DorM_Y[1] } + Dt_Fld3 = MorD_DorM_Y[3] + return sprintf( DATE_FMT, Dt_Fld1, ODFD, Dt_Fld2, ODFD, Dt_Fld3 ) +} + +function Kludge_1752() { + # kludge for September 1752 & the change to the Gregorian Calendar + Mdays[9] = 30 + if ( Mon[2] == 9 ) { + Start[1] = Start[2] = 1 + FMT1 ; Start[3] = -1 + FMT1 + } + else if ( Mon[2] == 8 ) { + Start[1] = 2 + FMT1 ; Start[2] = 5 + FMT1 ; Start[3] = 1 + FMT1 + } + else if ( Mon[2] == 10 ) { + Start[1] = 1 + FMT1 ; Start[2] = -1 + FMT1 ; Start[3] = 3 + } +} + +function Get_Mnum() { + ARG1 = tolower(ARG1) + months = tolower(MONTHS) + split(months,month) + for (i=1;i<=12;i++) { + if ( index(month[i],ARG1) == 1 ) { ARG = i ; n++ } + } + if ( n == 1 ) ARG1 = ARG + else if ( n == 0 ) { ERR = 1 ; exit } + else { ERR = 8 ; exit } +} + +function Get_Num(date,m,d,y,j) { + # get the number of days from one date to another date + NOW = y m d ; N = 0 ; M = m + 0 ; D = d + 0 ; Y = y + 0 ; J = j + 0 + split(date,mdy,IDFD) + M2 = mdy[1] ; D2 = mdy[2] ; Y2 = mdy[3] + THEN = Y2 M2 D2 ; M2 = M2 + 0 ; D2 = D2 + 0 ; Y2 = Y2 + 0 + Leap(Y2) + if ( M2 > 12 ) { ERR = 4 ; exit } + if ( D2 > Mdays[M2] && Y2 != 1752 && M2 != 9 ) { ERR = 5 ; exit } + if ( THEN ~ /^1752090[3-9]$|^1752091[0-3]$/ ) { ERR = 6 ; exit } + Leap(Y) + if ( THEN > NOW ) { + Ydays = Ydays - J + 1 ; mdays = Mdays[M] - D + 1 + while ( Y < Y2 ) Next_Y() + while ( M < M2 ) Next_M() + while ( D < D2 ) Next_D() + N *= -1 + } + else { + Ydays = J ; mdays = D + while ( Y > Y2 ) Prev_Y() + while ( M > M2 ) Prev_M() + if ( Y == 1752 && M == 9 && D == 19 ) D = 30 + while ( D > D2 ) Prev_D() + } + return N +} + +function Get_Date(n,m,d,y,j) { + # get the date a number of days before or after a date + N = n + 0 ; M = m + 0 ; D = d + 0 ; Y = y + 0 ; J = j + 0 + if ( N != 0 ) { + Leap(Y) + if ( N > 0 ) { + Ydays = Ydays - J + 1 ; mdays = Mdays[M] - D + 1 + while ( N >= Ydays ) { Next_Y() ; Leap(Y) } + while ( N >= ( ( mdays > 0 ) ? mdays : Mdays[M] ) ) { Next_M() } + while ( N > 0 ) Next_D() + } + else { + Ydays = J ; mdays = D ; N *= -1 + while ( N >= Ydays ) { Prev_Y() ; Leap(Y) } + while ( N >= ( ( mdays > 0 ) ? mdays : Mdays[M] ) ) { Prev_M() } + if ( Y == 1752 && M == 9 && D == 19 ) D = 30 + while ( N > 0 ) Prev_D() + } + if ( Y < 1 ) { ERR = 3 ; exit } + } + return M ODFD D ODFD Y +} + +function Leap(YR) { + # adjust for Leap Years + if ( YR % 4 == 0 && ( YR % 100 != 0 || YR % 400 == 0 || YR < 1800 ) ) { + Ydays = 366 ; Mdays[2] = 29 } + else { Ydays = 365 ; Mdays[2] = 28 } + if ( YR != 1752 ) Mdays[9] = 30 + else { Ydays = 355 ; Mdays[9] = 19 } +} + +function Chk_Date(date) { + # check validity of input dates + split(date,mdy,IDFD) + mm = mdy[1] + 0 ; dd = mdy[2] + 0 ; yy = mdy[3] + 0 + if ( mm == 2 ) Leap(yy) + if ( yy < 1 ) { ERR = 3 ; exit } + if ( mm < 1 || mm > 12 ) { ERR = 4 ; exit } + if ( dd < 1 || dd > Mdays[mm] ) { ERR = 5 ; exit } +} + +# day counting functions for next or previous year, month and day +function Next_Y() { + N -= Ydays ; Y += 1 ; M = 1 ; D = 1 ; mdays = 0 ; Leap(Y) +} +function Next_M() { + if ( mdays != 0 ) N -= mdays ; else N -= Mdays[M] + M += 1 ; D = 1 ; mdays = 0 +} +function Next_D() { + N -= 1 ; D += 1 + if ( D > Mdays[M] ) { M += 1 ; D = 1 } + else if ( Y == 1752 && M == 9 && D == 2 ) D = 13 +} +function Prev_Y() { + N -= Ydays ; Y -= 1 ; M = 12 ; D = 31 ; mdays = 0 ; Leap(Y) +} +function Prev_M() { + if ( mdays != 0 ) N -= mdays ; else N -= Mdays[M] + M -= 1 ; D = Mdays[M] ; mdays = 0 +} +function Prev_D() { + N -= 1 ; D -= 1 ; if ( Y == 1752 && M == 9 && D == 13 ) D = 2 +} + +function Get_J(m,d,y) { + # get the Julian date for an input date + m = m + 0 ; d = d + 0 ; y = y + 0 + Leap(y) + j = d + for (i=1;i= 7 ) Start[i] -= 7 } + for (mm=1;mm<=3;mm++) { + if ( Year[mm] != Year[mm-1] ) + printf( "%s %s %s\\n", so, Year[mm], se ) + if ( mm == 1 ) printf( "%s %s %s\\n", so, Header[FMT], se ) + j = k = 1 + while ( j <= M_Len[Mon[mm]] ) { + line = "" + for (i=1;i<=7;i++) { + if ( Start[mm] > 0 || j > M_Len[Mon[mm]] ) { date = "" ; Start[mm]-- } + else date = j++ + if ( mm == 2 && date == today ) { So = so ; Se = se } + else { So = Se = "" } + line = sprintf( "%s%s%2s%s ", line, So, date, Se ) + } + m1 = substr(M_Name[Mon[mm]],k++,1) + printf( "%s %1s %s %s%s %s\\n", so, m1, se, line, so, se ) + } + } + printf( time_fmt, so, time_is, time, se ) +}' >$prog + +date +"$DATE_ARGS" | ${AWK:=mawk} -f $prog so=$so se=$se + +exit 0 + +# EOF 'hical' - Tue Dec 19 19:19:19 EST 1994 +# Bob Stockler - bob@trebor.iglou.com - CIS: 72726,452 diff --git a/src/libmawk/examples/nocomment.awk b/src/libmawk/examples/nocomment.awk new file mode 100644 index 0000000..a10d93f --- /dev/null +++ b/src/libmawk/examples/nocomment.awk @@ -0,0 +1,30 @@ + +# remove C comments from a list of files +# using a comment as the record separator +# +# this is trickier than I first thought +# The first version in .97-.9993 was wrong + +BEGIN { + # RS is set to a comment (this is mildly tricky, I blew it here + RS = "/\*([^*]|\*+[^*/])*\*+/" + ORS = " " + getline hold + filename = FILENAME +} + +# if changing files +filename != FILENAME { + filename = FILENAME + printf "%s" , hold + hold = $0 + next +} + +{ # hold one record because we don't want ORS on the last + # record in each file + print hold + hold = $0 +} + +END { printf "%s", hold } diff --git a/src/libmawk/examples/primes.awk b/src/libmawk/examples/primes.awk new file mode 100644 index 0000000..0341424 --- /dev/null +++ b/src/libmawk/examples/primes.awk @@ -0,0 +1,62 @@ + +# primes.awk +# +# mawk -f primes.awk [START] STOP +# find all primes between 2 and STOP +# or START and STOP +# + + + +function usage() +{ ustr = sprintf("usage: %s [start] stop", ARGV[0]) + system( "echo " ustr) + exit 1 +} + + +BEGIN { if (ARGC == 1 || ARGC > 3 ) usage() + if ( ARGC == 2 ) { start = 2 ; stop = ARGV[1]+0 } + else + if ( ARGC == 3 ) { start = ARGV[1]+0 ; stop = ARGV[2]+0 } + + if ( start < 2 ) start = 2 + if ( stop < start ) stop = start + + prime[ p_cnt = 1 ] = 3 # keep primes in prime[] + +# keep track of integer part of square root by adding +# odd integers + odd = test = 5 + root = 2 + squares = 9 + + +while ( test <= stop ) +{ + if ( test >= squares ) + { root++ + odd += 2 + squares += odd + } + + flag = 1 + for ( i = 1 ; prime[i] <= root ; i++ ) + if ( test % prime[i] == 0 ) # not prime + { flag = 0 ; break } + + if ( flag ) prime[ ++p_cnt ] = test + + test += 2 +} + +prime[0] = 2 + +for( i = 0 ; prime[i] < start ; i++) ; + +for ( ; i <= p_cnt ; i++ ) print prime[i] + +} + + + diff --git a/src/libmawk/examples/qsort.awk b/src/libmawk/examples/qsort.awk new file mode 100644 index 0000000..3992541 --- /dev/null +++ b/src/libmawk/examples/qsort.awk @@ -0,0 +1,78 @@ + + +# qsort text files +# + +function middle(x,y,z) #return middle of 3 +{ + if ( x <= y ) + { if ( z >= y ) return y + if ( z < x ) return x + return z + } + + if ( z >= x ) return x + if ( z < y ) return y + return z +} + + +function isort(A , n, i, j, hold) +{ + # if needed a sentinal at A[0] will be created + + for( i = 2 ; i <= n ; i++) + { + hold = A[ j = i ] + while ( A[j-1] > hold ) + { j-- ; A[j+1] = A[j] } + + A[j] = hold + } +} + + +# recursive quicksort +function qsort(A, left, right ,i , j, pivot, hold) +{ + + pivot = middle(A[left], A[int((left+right)/2)], A[right]) + + i = left + j = right + + while ( i <= j ) + { + while ( A[i] < pivot ) i++ + while ( A[j] > pivot ) j-- + + if ( i <= j ) + { hold = A[i] + A[i++] = A[j] + A[j--] = hold + } + } + + if ( j - left > BLOCK ) qsort(A,left,j) + if ( right - i > BLOCK ) qsort(A,i,right) +} + +BEGIN { BLOCK = 5 } + + +{ line[NR] = $0 "" # sort as string +} + +END { + + if ( NR > BLOCK ) qsort(line, 1, NR) + + isort(line, NR) + + for(i = 1 ; i <= NR ; i++) print line[i] +} + + + + + diff --git a/src/libmawk/execute.c b/src/libmawk/execute.c new file mode 100644 index 0000000..77cda06 --- /dev/null +++ b/src/libmawk/execute.c @@ -0,0 +1,1999 @@ +/******************************************** +mawk_execute.c + +libmawk changes (C) 2009-2014, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-1996, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "code.h" +#include "memory.h" +#include "symtype.h" +#include "field.h" +#include "bi_funct.h" +#include "bi_vars.h" +#include "regexp.h" +#include "repl.h" +#include "fin.h" +#include "debug.h" +#include +#include "num.h" +#include "math_wrap.h" +#include "cell.h" +#include "execute.h" +#include "f2d.h" + + +static double compare(mawk_state_t *, mawk_cell_t *); +static int d_to_index(mawk_state_t *, mawk_num_t); + + +/* The stack machine that mawk_executes the code */ + +#ifdef DEBUG +void DB_mawk_eval_overflow(mawk_state_t * MAWK) +{ + mawk_overflow(MAWK, "eval stack", EVAL_STACK_SIZE); +} +#endif + +/* holds info for array loops (on a stack) */ +typedef struct aloop_state { + struct aloop_state *link; + mawk_cell_t *var; /* for(var in A) */ + mawk_string_t **base; + mawk_string_t **ptr; + mawk_string_t **limit; +} ALOOP_STATE; + +/* clean up aloop stack on next, return, exit */ +#define CLEAR_ALOOP_STACK() if(aloop_state){\ + clear_aloop_stack(MAWK, aloop_state);\ + aloop_state=(ALOOP_STATE*)0;}else + +/* free the vector (aloop_state's ->base array and its members) */ +static void aloop_free_vect(mawk_state_t *MAWK, ALOOP_STATE *top) +{ + while (top->ptr < top->limit) { + free_STRING(*top->ptr); + top->ptr++; + } + + if (top->base != NULL) + mawk_free(MAWK, top->base); +} + + +static void clear_aloop_stack(mawk_state_t *MAWK, ALOOP_STATE *top) +{ + ALOOP_STATE *q; + do { + aloop_free_vect(MAWK, top); + q = top; + top = q->link; + MAWK_ZFREE(MAWK, q); + } while (top); +} + +mawk_cell_t *mawk_call_c_func(mawk_state_t * MAWK, mawk_cell_t * sp, FBLOCK * fbp, int a_args) +{ + /* unresolved functions must be C function calls */ + if (fbp->code == NULL) { + SYMTAB *sym; + void *old_userdata; + + sym = mawk_find(MAWK, fbp->name, 1); + if (sym->type == ST_C_FUNCTION) { + libmawk_c_function *old_func; + + sp[1].type = ST_NONE; + old_userdata = MAWK->func_userdata; + old_func = MAWK->func_being_called; + MAWK->func_userdata = sym->stval.c_function.func_userdata; + MAWK->func_being_called = sym->stval.c_function.callback; + sp = sym->stval.c_function.callback(MAWK, sp, a_args) + 1; + MAWK->func_userdata = old_userdata; + MAWK->func_being_called = old_func; + } + else { + mawk_rt_error(MAWK, "unresolved function call %s\n", fbp->name); + } + return sp; + } + return NULL; +} + +#define comment(s) + +#define mawk_call_vars \ + mawk_cell_t *nfp = sp - a_args + 1; comment("new fp for callee") \ + mawk_cell_t *local_p = sp + 1 comment("first local argument on stack") \ + +#define mawk_call_pre() \ +do { \ + char *type_p; comment("pts to type of an argument") \ + int t; \ + if (fbp->nargs) \ + type_p = fbp->typev + a_args - 1; \ + comment("create space for locals");\ + comment("t is number of locals"); \ + t = fbp->nargs - a_args; \ + while (t > 0) { \ + t--; \ + sp++; \ + type_p++; \ + sp->type = C_NOINIT; \ + if (*type_p == ST_LOCAL_ARRAY) \ + sp->ptr = (PTR) mawk_array_new(MAWK, NULL); \ + } \ + if (MAWK->debug_symbols) \ + mawk_debug_callstack_push(MAWK, fbp); \ +} while(0) + +#define mawk_call_post(fbp, a_args_) \ +do { \ + int a_args = a_args_; \ + mawk_call_vars; \ + char *type_p; comment("pts to type of an argument"); \ + if (fbp->nargs) \ + type_p = fbp->typev + a_args - 1; \ + if (MAWK->debug_symbols) \ + mawk_debug_callstack_pop(MAWK); \ + comment("cleanup the callee's arguments"); \ + comment("putting return value at top of eval stack"); \ + if (sp >= nfp) { \ + mawk_cell_t *cp; \ + comment("cp -> the function return"); \ + cp = sp + 1; \ + do { \ + if (*type_p == ST_LOCAL_ARRAY) { \ + if (sp >= local_p) { \ + mawk_array_clear(MAWK, (mawk_array_t)(sp->ptr)); \ + MAWK_ZFREE(MAWK, (mawk_array_t) sp->ptr); \ + } \ + } \ + else \ + mawk_cell_destroy(MAWK, sp); \ + type_p--; \ + sp--; \ + } \ + while (sp >= nfp); \ + break; \ + mawk_cellcpy(MAWK, ++sp, cp); \ + mawk_cell_destroy(MAWK, cp); \ + } \ + else {\ + comment("no arguments passed"); \ + sp++; \ + } \ +} while(0) + +/******************* mawk_execute: the VM ********************/ + +enum { + EXEST_NORMAL, /* don't do anything special when popping this frame */ + EXEST_EXIT, /* exit when popping this frame */ + EXEST_RANGE1, /* frame was the pat1 match code for a range */ + EXEST_RANGE2 /* frame was the pat1 match code for a range */ +}; + +#include "execute_debug.h" + +/* cast cp to num and copy the value of the num to res_num (which is mawk_num_t) */ +#define mawk_cast_get_num(res_num, cp) \ + do { \ + if (cp->type != C_NUM) \ + mawk_cast1_to_num(MAWK, cp); \ + res_num = cp->d.dval; \ + } while(0) + +#define RECURSION_OVERHEAD 8 + +#define mawk_push_exe_state(exest) \ +do { \ + inc_sp(); sp->type = C_EXE_STTYPE; sp->d.vcnt = exest; \ + inc_sp(); sp->type = C_EXE_STATE; sp->ptr = cdp; \ + inc_sp(); sp->type = C_EXE_STATE; sp->ptr = fp; \ + inc_sp(); sp->type = C_EXE_STATE; sp->ptr = aloop_state; \ + inc_sp(); sp->type = C_EXE_STATE; sp->ptr = old_stack_base; \ + inc_sp(); sp->type = C_EXE_STATE; sp->ptr = old_sp; \ + inc_sp(); sp->type = C_EXE_STATE; sp->ptr = call_fbp; \ + inc_sp(); sp->type = C_EXE_STATE; sp->d.vcnt = call_a_args; \ + db1printf("state push sp=%d..%d\n", stackptr(sp - RECURSION_OVERHEAD+1), stackptr(sp)); \ +} while(0) + +#define mawk_pop_exe_state()\ +do { \ + if ((sp[1-RECURSION_OVERHEAD].type != C_EXE_STTYPE) || (sp[0].type != C_EXE_STATE)) \ + mawk_bozo(MAWK, "eval stack broken (in recursion)"); \ + exest = sp[-7].d.vcnt; \ + cdp = sp[-6].ptr; \ + fp = sp[-5].ptr; \ + aloop_state = sp[-4].ptr; \ + old_stack_base = sp[-3].ptr; \ + old_sp = sp[-2].ptr; \ + call_fbp = sp[-1].ptr; \ + call_a_args = sp[0].d.vcnt; \ + db1printf("state pop sp=%d..%d exest=%d\n", stackptr(sp - RECURSION_OVERHEAD+1), stackptr(sp), exest); \ + sp -= RECURSION_OVERHEAD; \ +} while(0) + +mawk_exec_result_t mawk_execute_(mawk_state_t *MAWK) +{ + /* --- some useful temporaries (not saved on the eval stack during recursion) --- */ + mawk_cell_t *cp; + int t; + double dt; + mawk_num_t tmp_num; + unsigned long runcount; + +#ifdef DEBUG + mawk_cell_t *entry_sp = sp; +#endif + + + /* --- execution state (saved on evan stack during recursion) --- */ + register INST *cdp; /* code ptr, continue execution here */ + register mawk_cell_t *sp; /* eval_stack pointer */ + mawk_cell_t *fp = 0; /* frame ptr into eval_stack for user defined functions */ + + FBLOCK *call_fbp = NULL; /* user function being executed */ + int call_a_args = 0; /* number of caller args in user func being executed */ + + ALOOP_STATE *aloop_state = (ALOOP_STATE *) 0; /* save state for array loops via a stack */ + mawk_cell_t *old_stack_base, *old_sp; /* for moving the eval stack on deep recursion */ + int exest = 0; + + sp = MAWK->sp; + + db1printf("exe enter sp=%d\n", stackptr(sp)); + exe_return:; + db1printf("exe_return sp=%d fp=%d\n", stackptr(sp), stackptr(fp)); + db1printstack(MAWK, "exe_return:\n", sp, fp); + mawk_pop_exe_state(); + if (exest == EXEST_EXIT) { + /* hit the bottom of current execution request; mawk_execute_ either + returns from here because normal end-of-execution or from a getline + that got "NO_MORE" */ + goto out; + } + + call_entry:; + if (fp) { + /* we are a function call, check for deep recursion */ + if (sp > MAWK->stack_danger) { /* change stacks */ + /* it's enough to save one set of stack_base and sp here; by the next time + we get in the danger zone the old values are already saved on the stack by + mawk_push_exe_state(exest) */ + old_stack_base = MAWK->stack_base; + old_sp = sp; + MAWK->stack_base = (mawk_cell_t *) mawk_zmalloc(MAWK, sizeof(mawk_cell_t) * EVAL_STACK_SIZE); + MAWK->stack_danger = MAWK->stack_base + DANGER; + sp = MAWK->stack_base; + /* waste 1 slot for ANSI, actually large model msdos breaks in + RET if we don't */ +#ifdef DEBUG + entry_sp = sp; +#endif + } + else + old_stack_base = (mawk_cell_t *) 0; + } + + runcount = MAWK->runlimit; + while (MAWK->rt_exit_code == 0) { + runcount--; + if (runcount == 0) + goto out_runlimit; + switch (cdp++->op) { + +/* HALT only used by the disassemble now ; this remains + so compilers don't offset the jump table */ + case _HALT: + mawk_bozo(MAWK, "ran on halt"); + goto out_exit; + + case _PUSHC: + inc_sp(); + mawk_cellcpy(MAWK, sp, cdp++->ptr); + break; + case _PUSHD: + inc_sp(); + sp->type = C_NUM; + sp->d.dval = *(mawk_num_t *) cdp++->ptr; + break; + + case _PUSHS: + inc_sp(); + sp->type = C_STRING; + sp->ptr = cdp++->ptr; + string(sp)->ref_cnt++; + break; + + case F_PUSHA: + cp = (mawk_cell_t *) cdp->ptr; + if (cp != MAWK->field) { + if (MAWK->nf < 0) + mawk_split_field0(MAWK); + + if (!(cp >= MAWK_NF && cp <= LAST_PFIELD)) { + /* its a real field $1, $2 ... + If its greater than $NF, we have to + make sure its set to "" so that + (++|--) and g?sub() work right + */ + t = mawk_field_addr_to_index(MAWK, cp); + if (t > MAWK->nf) { + mawk_cell_destroy(MAWK, cp); + cp->type = C_STRING; + cp->ptr = (PTR) & (MAWK->null_str); + MAWK->null_str.ref_cnt++; + } + } + } + /* fall thru */ + + case _PUSHA: + case A_PUSHA: + inc_sp(); + sp->type = C_NOINIT; /* normal varref, not C_ARR_REF */ + sp->ptr = cdp++->ptr; + break; + + case _PUSHI: + /* put contents of next address on stack */ + inc_sp(); + mawk_cellcpy(MAWK, sp, cdp++->ptr); + break; + + case L_PUSHI: + /* put the contents of a local var on stack, + cdp->op holds the offset from the frame pointer */ + inc_sp(); + db1printf("pushi fp=%d + %d\n", stackptr(fp), cdp->op); + mawk_cellcpy(MAWK, sp, fp + cdp++->op); + break; + + case L_PUSHA: + /* put a local address on eval stack */ + inc_sp(); + sp->type = C_NOINIT; /* normal varref, not C_ARR_REF */ + sp->ptr = (PTR) (fp + cdp++->op); + break; + + + case F_PUSHI: + + /* push contents of $i + cdp[0] holds & $i , cdp[1] holds i */ + + inc_sp(); + if (MAWK->nf < 0) + mawk_split_field0(MAWK); + cp = (mawk_cell_t *) cdp->ptr; + t = (cdp + 1)->op; + cdp += 2; + + if (t <= MAWK->nf) + mawk_cellcpy(MAWK, sp, cp); + else { /* an unset field */ + + sp->type = C_STRING; + sp->ptr = (PTR) & (MAWK->null_str); + MAWK->null_str.ref_cnt++; + } + break; + + case NF_PUSHI: + + inc_sp(); + if (MAWK->nf < 0) + mawk_split_field0(MAWK); + mawk_cellcpy(MAWK, sp, MAWK_NF); + break; + + case FE_PUSHA: + + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + + t = d_to_index(MAWK, sp->d.dval); + if (t && MAWK->nf < 0) + mawk_split_field0(MAWK); + sp->ptr = (PTR) field_ptr(t); + if (t > MAWK->nf) { + /* make sure its set to "" */ + cp = (mawk_cell_t *) sp->ptr; + mawk_cell_destroy(MAWK, cp); + cp->type = C_STRING; + cp->ptr = (PTR) & (MAWK->null_str); + MAWK->null_str.ref_cnt++; + } + break; + + case FE_PUSHI: + + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + + t = d_to_index(MAWK, sp->d.dval); + + if (MAWK->nf < 0) + mawk_split_field0(MAWK); + if (t <= MAWK->nf) + mawk_cellcpy(MAWK, sp, field_ptr(t)); + else { + sp->type = C_STRING; + sp->ptr = (PTR) & MAWK->null_str; + MAWK->null_str.ref_cnt++; + } + break; + + + case AE_PUSHA: /* global lookup and push array mawk_cell_t */ + /* top of stack has an expr, cdp->ptr points at an + array, replace the expr with the bifunct_target arr ref */ + cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); + mawk_cellcpy(MAWK, cp, sp); + mawk_cell_destroy(MAWK, sp); + sp->type = C_ARR_REF_BT; + sp->ptr = (PTR) (mawk_array_t)(cdp->ptr); + sp->d.idx_cell = cp; + cdp++; + break; + + case AE_PUSHA_WRARR: /* global lookup and push array mawk_cell_t _ref_ for a later write */ + { + /* idx in sp, push array ptr to sp+1 */ + inc_sp(); + sp->ptr = (mawk_array_t)(cdp->ptr); + cdp++; + sp->type = C_ARR_REF; + } + break; + + + case AE_PUSHI: + /* top of stack has an expr, cdp->ptr points at an + array, replace the expr with the contents of the + cell inside the array */ + + mawk_array_find(MAWK, (mawk_array_t)(cdp->ptr), sp, sp, MAWK_CREATE); + cdp++; + break; + + case LAE_PUSHI: + /* sp[0] is an expression + cdp->op is offset from frame pointer of a mawk_cell_t which + has an mawk_array_t in the ptr field, replace expr + with array[expr] + */ + mawk_array_find(MAWK, (mawk_array_t)(fp[cdp->op].ptr), sp, sp, MAWK_CREATE); + cdp++; + break; + + case LAE_PUSHA: /* local array lookup and push array mawk_cell_t */ + /* sp[0] is an expression + cdp->op is offset from frame pointer of a mawk_cell_t which + has an mawk_array_t in the ptr field, replace expr + with bifunct_target arr ref + */ + cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); + mawk_cellcpy(MAWK, cp, sp); + mawk_cell_destroy(MAWK, sp); + sp->type = C_ARR_REF_BT; + sp->ptr = (PTR) (mawk_array_t)(fp[cdp->op].ptr); + sp->d.idx_cell = cp; + cdp++; + break; + case LAE_PUSHA_WRARR: /* local array lookup and push array mawk_cell_t _ref_ for a later write */ + { + /* idx in sp, push array ptr to sp+1 */ + inc_sp(); + sp->ptr = (mawk_array_t)(fp[cdp->op].ptr); + cdp++; + sp->type = C_ARR_REF; + } + break; + + + case LA_PUSHA: + /* cdp->op is offset from frame pointer of a mawk_cell_t which + has an mawk_array_t in the ptr field. Push this ARRAY + on the eval stack + */ + inc_sp(); + sp->type = C_NOINIT; /* normal varref, not C_ARR_REF */ + sp->ptr = fp[cdp++->op].ptr; + break; + + case SET_ALOOP: + { + ALOOP_STATE *ap = MAWK_ZMALLOC(MAWK, ALOOP_STATE); + unsigned vector_size; + + ap->var = (mawk_cell_t *) sp[-1].ptr; + ap->base = ap->ptr = mawk_array_loop_vector(MAWK, (mawk_array_t)(sp->ptr), &vector_size); + ap->limit = ap->base + vector_size; + sp -= 2; + + /* push onto aloop stack */ + ap->link = aloop_state; + aloop_state = ap; + cdp += cdp->op; + } + break; + + case ALOOP: + { + ALOOP_STATE *ap = aloop_state; + if (ap->ptr < ap->limit) { + mawk_cell_destroy(MAWK, ap->var); + ap->var->type = C_STRING; + ap->var->ptr = (PTR) * ap->ptr++; + cdp += cdp->op; + } + else + cdp++; + } + break; + + case POP_AL: + { + /* finish up an array loop */ + ALOOP_STATE *ap = aloop_state; + aloop_state = ap->link; + aloop_free_vect(MAWK, ap); + MAWK_ZFREE(MAWK, ap); + } + break; + + case _POP: + mawk_cell_destroy(MAWK, sp); + sp--; + break; + + case _ASSIGN: + /* top of stack has an expr, next down is an + address, put the expression in *address and + replace the address with the expression */ + + /* don't propagate type C_MBSTRN */ + if (sp->type == C_MBSTRN) + mawk_check_strnum(MAWK, sp); + sp--; + mawk_cell_destroy(MAWK, ((mawk_cell_t *) sp->ptr)); + mawk_cellcpy(MAWK, sp->ptr, sp + 1); + mawk_cellcpy(MAWK, sp, sp->ptr); + mawk_cell_destroy(MAWK, sp + 1); + break; + + case _ASSIGN_ARR: + /* don't propagate type C_MBSTRN */ + if (sp->type == C_MBSTRN) + mawk_check_strnum(MAWK, sp); + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is rvalue expr result */ + mawk_array_set_execute(MAWK, sp, sp+1, sp, sp+2); + break; + + + case F_ASSIGN: + /* assign to a field */ + if (sp->type == C_MBSTRN) + mawk_check_strnum(MAWK, sp); + sp--; + mawk_field_assign(MAWK, (mawk_cell_t *) sp->ptr, sp + 1); + mawk_cell_destroy(MAWK, sp + 1); + mawk_cellcpy(MAWK, sp, (mawk_cell_t *) sp->ptr); + break; + + case _ADD_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + P_nansafe1(cp->d.dval, (cp->d.dval + sp->d.dval), sp->d.dval); + sp--; + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + break; + + case _ADD_ASG_ARR: + /* convert the expression result */ + mawk_cast_get_num(tmp_num, sp); + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is rvalue expr result; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + P_nansafe1(sp[2].d.dval, (sp[2].d.dval + tmp_num), tmp_num); + mawk_array_set_execute(MAWK, sp, sp+1, sp, sp+2); + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + P_nansafe1(cp->d.dval, (cp->d.dval + tmp_num), tmp_num); + } + break; + + case _SUB_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + P_nansafe1(cp->d.dval, (cp->d.dval - sp->d.dval), sp->d.dval); + sp--; + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + break; + + case _SUB_ASG_ARR: + /* convert the expression result */ + mawk_cast_get_num(tmp_num, sp); + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is rvalue expr result; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + P_nansafe1(sp[2].d.dval, (sp[2].d.dval - tmp_num), tmp_num); + mawk_array_set_execute(MAWK, sp, sp+1, sp, sp+2); + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + P_nansafe1(cp->d.dval, (cp->d.dval - tmp_num), tmp_num); + } + break; + + case _MUL_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + P_nansafe1(cp->d.dval, (cp->d.dval * sp->d.dval), sp->d.dval); + sp--; + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + break; + + case _MUL_ASG_ARR: + /* convert the expression result */ + mawk_cast_get_num(tmp_num, sp); + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is rvalue expr result; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + P_nansafe1(sp[2].d.dval, (sp[2].d.dval * tmp_num), tmp_num); + mawk_array_set_execute(MAWK, sp, sp+1, sp, sp+2); + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + P_nansafe1(cp->d.dval, (cp->d.dval * tmp_num), tmp_num); + } + break; + + case _DIV_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + +#ifdef MAWK_NO_FLOAT + { + mawk_num_t d; + d = sp--->d.dval; + if (d != MAWK_NUM_ZERO) + cp->d.dval /= d; + else + cp->d.dval = P_nan(); + } +#else + { + P_nansafe1(cp->d.dval, (cp->d.dval / sp->d.dval), sp->d.dval); + sp--; + } +#endif + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + break; + + + case _DIV_ASG_ARR: + /* convert the expression result */ + mawk_cast_get_num(tmp_num, sp); + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is rvalue expr result; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + +#ifdef MAWK_NO_FLOAT + if (tmp_num != MAWK_NUM_ZERO) + sp[2].d.dval /= tmp_num; + else + sp[2].d.dval = P_nan(); +#else + P_nansafe1(sp[2].d.dval, (sp[2].d.dval / tmp_num), tmp_num); +#endif + mawk_array_set_execute(MAWK, sp, sp+1, sp, sp+2); + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); +#ifdef MAWK_NO_FLOAT + if (tmp_num != MAWK_NUM_ZERO) + cp->d.dval /= tmp_num; + else + cp->d.dval = P_nan(); +#else + P_nansafe1(cp->d.dval, (cp->d.dval / tmp_num), tmp_num); +#endif + } + break; + + case _MOD_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + + if (P_isnan_manual(cp->d.dval) || P_isnan_manual(sp->d.dval)) + cp->d.dval = P_nan(); + else { +#ifdef MAWK_NO_FLOAT + cp->d.dval %= sp->d.dval; +#else + cp->d.dval = P_fmod(cp->d.dval, sp->d.dval); +#endif + } + sp--; + + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + break; + + case _MOD_ASG_ARR: + /* convert the expression result */ + mawk_cast_get_num(tmp_num, sp); + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is rvalue expr result; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + +#ifdef MAWK_NO_FLOAT + sp[2].d.dval %= tmp_num; +#else + sp[2].d.dval = P_fmod(sp[2].d.dval, tmp_num); +#endif + mawk_array_set_execute(MAWK, sp, sp+1, sp, sp+2); + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); +#ifdef MAWK_NO_FLOAT + cp->d.dval %= tmp_num; +#else + cp->d.dval = P_fmod(cp->d.dval, tmp_num); +#endif + } + break; + + + case _POW_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + cp->d.dval = mawk_num_pow(cp->d.dval, sp--->d.dval); + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + break; + + case _POW_ASG_ARR: + /* convert the expression result */ + mawk_cast_get_num(tmp_num, sp); + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is rvalue expr result; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + sp[2].d.dval = mawk_num_pow(sp[2].d.dval, tmp_num); + mawk_array_set_execute(MAWK, sp, sp+1, sp, sp+2); + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + cp->d.dval = mawk_num_pow(cp->d.dval, tmp_num); + } + break; + + + case F_ADD_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + P_nansafe1(MAWK->tc.d.dval, (MAWK->tc.d.dval + sp->d.dval), sp->d.dval); + sp--; + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case F_SUB_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + P_nansafe1(MAWK->tc.d.dval, (MAWK->tc.d.dval - sp->d.dval), sp->d.dval); + sp--; + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case F_MUL_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + + P_nansafe1(MAWK->tc.d.dval, (MAWK->tc.d.dval * sp->d.dval), sp->d.dval); + sp--; + + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case F_DIV_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + +#ifdef MAWK_NO_FLOAT + { + mawk_num_t d; + d = sp--->d.dval; + if (d != MAWK_NUM_ZERO) + MAWK->tc.d.dval /= d; + else + MAWK->tc.d.dval = P_nan(); + } +#else + P_nansafe1(MAWK->tc.d.dval, (MAWK->tc.d.dval / sp->d.dval), sp->d.dval); + sp--; +#endif + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case F_MOD_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + +#ifdef MAWK_NO_FLOAT + { + int d; + d = sp--->d.dval; + if (d != MAWK_NUM_ZERO) + MAWK->tc.d.dval %= d; + else + MAWK->tc.d.dval = P_nan(); + } +#else + MAWK->tc.d.dval = P_fmod(MAWK->tc.d.dval, sp--->d.dval); +#endif + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case F_POW_ASG: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + cp = (mawk_cell_t *) (sp - 1)->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + MAWK->tc.d.dval = mawk_num_pow(MAWK->tc.d.dval, sp--->d.dval); + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case _ADD: + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + + P_nansafe1(sp[0].d.dval, (sp[0].d.dval + sp[1].d.dval), sp[1].d.dval); + break; + + case _SUB: + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + + P_nansafe1(sp[0].d.dval, (sp[0].d.dval - sp[1].d.dval), sp[1].d.dval); + break; + + case _MUL: + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + + P_nansafe1(sp[0].d.dval, (sp[0].d.dval * sp[1].d.dval), sp[1].d.dval); + break; + + case _DIV: + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + +#ifdef MAWK_NO_FLOAT + if (sp[1].d.dval != 0) + sp[0].d.dval /= sp[1].d.dval; + else + sp[0].d.dval = P_nan(); +#else + P_nansafe1(sp[0].d.dval, (sp[0].d.dval / sp[1].d.dval), sp[1].d.dval); +#endif + break; + + case _MOD: + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + +#ifdef MAWK_NO_FLOAT + { + int d; + d = sp[1].d.dval; + if (d != MAWK_NUM_ZERO) + sp[0].d.dval %= d; + else + sp[0].d.dval = P_nan(); + } +#else + sp[0].d.dval = P_fmod(sp[0].d.dval, sp[1].d.dval); +#endif + break; + + case _POW: + sp--; + if (TEST2(sp) != TWO_NUMS) + mawk_cast2_to_num(MAWK, sp); + sp[0].d.dval = mawk_num_pow(sp[0].d.dval, sp[1].d.dval); + break; + + case _NOT: + /* evaluates to 0.0 or 1.0 */ + reswitch_1: + switch (sp->type) { + case C_NOINIT: + sp->d.dval = MAWK_NUM_ONE; + break; + case C_NUM: + if (!P_isnan_manual(sp->d.dval)) + sp->d.dval = sp->d.dval != MAWK_NUM_ZERO ? MAWK_NUM_ZERO : MAWK_NUM_ONE; + break; + case C_STRING: + sp->d.dval = string(sp)->len ? MAWK_NUM_ZERO : MAWK_NUM_ONE; + free_STRING(string(sp)); + break; + case C_STRNUM: /* mawk_test as a number */ + sp->d.dval = sp->d.dval != MAWK_NUM_ZERO ? MAWK_NUM_ZERO : MAWK_NUM_ONE; + free_STRING(string(sp)); + break; + case C_MBSTRN: + mawk_check_strnum(MAWK, sp); + goto reswitch_1; + default: + mawk_bozo(MAWK, "bad type on eval stack"); + } + sp->type = C_NUM; + break; + + case _TEST: + /* evaluates to 0.0 or 1.0 */ + reswitch_2: + switch (sp->type) { + case C_NOINIT: + sp->d.dval = MAWK_NUM_ZERO; + break; + case C_NUM: + if (!P_isnan_manual(sp->d.dval)) + sp->d.dval = sp->d.dval != MAWK_NUM_ZERO ? MAWK_NUM_ONE : MAWK_NUM_ZERO; + break; + case C_STRING: + sp->d.dval = string(sp)->len ? MAWK_NUM_ONE : MAWK_NUM_ZERO; + free_STRING(string(sp)); + break; + case C_STRNUM: /* mawk_test as a number */ + sp->d.dval = sp->d.dval != MAWK_NUM_ZERO ? MAWK_NUM_ONE : MAWK_NUM_ZERO; + free_STRING(string(sp)); + break; + case C_MBSTRN: + mawk_check_strnum(MAWK, sp); + goto reswitch_2; + default: + mawk_bozo(MAWK, "bad type on eval stack"); + } + sp->type = C_NUM; + break; + + case _UMINUS: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + if (!P_isnan_manual(sp->d.dval)) + sp->d.dval = -sp->d.dval; + break; + + case _UPLUS: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + break; + + case _CAT: + { + unsigned len1, len2; + char *str1, *str2; + mawk_string_t *b; + + sp--; + if (TEST2(sp) != TWO_STRINGS) + mawk_cast2_to_str(MAWK, sp); + str1 = string(sp)->str; + len1 = string(sp)->len; + str2 = string(sp + 1)->str; + len2 = string(sp + 1)->len; + + b = mawk_new_STRING0(MAWK, len1 + len2); + memcpy(b->str, str1, len1); + memcpy(b->str + len1, str2, len2); + free_STRING(string(sp)); + free_STRING(string(sp + 1)); + + sp->ptr = (PTR) b; + break; + } + + case _PUSHINT: + inc_sp(); + sp->type = cdp++->op; + break; + + case _BUILTIN: + db1printstack(MAWK, "--start before bi", sp, fp); + cp = (*(PF_CP) mawk_d2f(cdp++->ptr)) (MAWK, sp); + db1printstack(MAWK, "--start after bi", sp, fp); + if (cp->type == C_REQ_NOMORE) { + db1printf("->nomore\n"); + cdp-=2; + goto out_nomore; + } + sp = cp; + if (sp->type == C_REQ_CALL) { /* the bi function returned with a request to call an user function */ + int a_args; + mawk_cell_t *pfp; + FBLOCK *fbp; + + db1printf("->req call\n"); + + fbp = (FBLOCK *) sp->ptr; + sp--; + a_args = sp->d.dval; /* actual/caller's number of args on stack already */ + sp--; + + pfp = sp - a_args + 1; + { + mawk_call_pre(); + + db1printf("++ CALL2 sp=%d a_args(caller)=%d fbp->nargs(callee)=%d\n", stackptr(sp), a_args,fbp->nargs); + db1printstack(MAWK, "--start call2", sp, fp); + mawk_push_exe_state(EXEST_NORMAL); + aloop_state = NULL; + +/*sp - RECURSION_OVERHEAD - fbp->nargs + a_args*/ + fp = pfp; + db1printf("CALL2 fp=%d final-sp=%d\n", stackptr(fp), stackptr(sp)); + + cdp = fbp->code; + db1printf("CALL new=%d\n", stackptr(sp)); + } + } + break; + + case _PRINT: + sp = (*(PF_CP) mawk_d2f(cdp++->ptr)) (MAWK, sp); + break; + + case _POST_INC: + cp = (mawk_cell_t *) sp->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + if (!P_isnan_manual(cp->d.dval)) + cp->d.dval += MAWK_NUM_ONE; + break; + + case _POST_INC_ARR: + inc_sp(); + sp->type = C_NOINIT; + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is tmp; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + tmp_num = sp[2].d.dval; + if (!P_isnan_manual(sp[2].d.dval)) + sp[2].d.dval += MAWK_NUM_ONE; + mawk_array_set(MAWK, (mawk_array_t)sp[1].ptr, sp, sp+2); + + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + tmp_num = cp->d.dval; + if (!P_isnan_manual(cp->d.dval)) + cp->d.dval += MAWK_NUM_ONE; + } + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = tmp_num; + break; + + + case _POST_DEC: + cp = (mawk_cell_t *) sp->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + if (!P_isnan_manual(sp->d.dval)) + cp->d.dval -= MAWK_NUM_ONE; + break; + + case _POST_DEC_ARR: + inc_sp(); + sp->type = C_NOINIT; + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is tmp; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + tmp_num = sp[2].d.dval; + if (!P_isnan_manual(sp[2].d.dval)) + sp[2].d.dval -= MAWK_NUM_ONE; + mawk_array_set(MAWK, (mawk_array_t)sp[1].ptr, sp, sp+2); + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + tmp_num = cp->d.dval; + if (!P_isnan_manual(cp->d.dval)) + cp->d.dval -= MAWK_NUM_ONE; + } + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = tmp_num; + break; + + case _PRE_INC: + cp = (mawk_cell_t *) sp->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + if (!P_isnan_manual(sp->d.dval)) + cp->d.dval += MAWK_NUM_ONE; + sp->d.dval = cp->d.dval; + sp->type = C_NUM; + break; + + case _PRE_INC_ARR: + inc_sp(); + sp->type = C_NOINIT; + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is tmp; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + if (!P_isnan_manual(sp[2].d.dval)) + sp[2].d.dval += MAWK_NUM_ONE; + mawk_array_set(MAWK, (mawk_array_t)sp[1].ptr, sp, sp+2); + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = sp[2].d.dval; + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + if (!P_isnan_manual(cp->d.dval)) + cp->d.dval += MAWK_NUM_ONE; + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + } + break; + + + case _PRE_DEC: + cp = (mawk_cell_t *) sp->ptr; + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + if (!P_isnan_manual(sp->d.dval)) + cp->d.dval -= MAWK_NUM_ONE; + sp->d.dval = cp->d.dval; + sp->type = C_NUM; + break; + + case _PRE_DEC_ARR: + inc_sp(); + sp->type = C_NOINIT; + sp-=2; + /* sp is the index, sp+1 is the array ref and sp+2 is tmp; after this sp+2 is the array val */ + if (!mawk_array_pure(MAWK, sp+1, 1)) { + mawk_array_getnum_execute(MAWK, sp+2, sp+1, sp); + if (!P_isnan_manual(sp[2].d.dval)) + sp[2].d.dval -= MAWK_NUM_ONE; + mawk_array_set(MAWK, (mawk_array_t)sp[1].ptr, sp, sp+2); + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = sp[2].d.dval; + } + else { + mawk_array_getptr_execute(MAWK, cp, sp+1, sp); + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + if (!P_isnan_manual(cp->d.dval)) + cp->d.dval -= MAWK_NUM_ONE; + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = cp->d.dval; + } + break; + + case F_POST_INC: + cp = (mawk_cell_t *) sp->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + if (!P_isnan_manual(MAWK->tc.d.dval)) + MAWK->tc.d.dval += MAWK_NUM_ONE; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case F_POST_DEC: + cp = (mawk_cell_t *) sp->ptr; + mawk_cellcpy(MAWK, &MAWK->tc, cp); + mawk_cast1_to_num(MAWK, &MAWK->tc); + sp->type = C_NUM; + sp->d.dval = MAWK->tc.d.dval; + if (!P_isnan_manual(MAWK->tc.d.dval)) + MAWK->tc.d.dval -= MAWK_NUM_ONE; + mawk_field_assign(MAWK, cp, &MAWK->tc); + break; + + case F_PRE_INC: + cp = (mawk_cell_t *) sp->ptr; + mawk_cellcpy(MAWK, sp, cp); + mawk_cast1_to_num(MAWK, sp); + if (!P_isnan_manual(sp->d.dval)) + sp->d.dval += MAWK_NUM_ONE; + mawk_field_assign(MAWK, cp, sp); + break; + + case F_PRE_DEC: + cp = (mawk_cell_t *) sp->ptr; + mawk_cellcpy(MAWK, sp, cp); + mawk_cast1_to_num(MAWK, sp); + if (!P_isnan_manual(sp->d.dval)) + sp->d.dval -= MAWK_NUM_ONE; + mawk_field_assign(MAWK, cp, sp); + break; + + case _JMP: + cdp += cdp->op; + break; + + case _JNZ: + /* jmp if top of stack is non-zero and pop stack */ + if (P_isnan(sp->d.dval)) + mawk_rt_error(MAWK, "NaN in conditional jump"); + if (mawk_test(MAWK, sp)) + cdp += cdp->op; + else + cdp++; + mawk_cell_destroy(MAWK, sp); + sp--; + break; + + case _JZ: + /* jmp if top of stack is zero and pop stack */ + if (P_isnan(sp->d.dval)) + mawk_rt_error(MAWK, "NaN in conditional jump"); + if (!mawk_test(MAWK, sp)) + cdp += cdp->op; + else + cdp++; + mawk_cell_destroy(MAWK, sp); + sp--; + break; + + case _LJZ: + /* special jump for logical and */ + /* this is always preceded by _TEST */ + if (P_isnan(sp->d.dval)) + mawk_rt_error(MAWK, "NaN in conditional jump"); + if (sp->d.dval == MAWK_NUM_ZERO) { + /* take jump, but don't pop stack */ + cdp += cdp->op; + } + else { + /* pop and don't jump */ + sp--; + cdp++; + } + break; + + case _LJNZ: + /* special jump for logical or */ + /* this is always preceded by _TEST */ + if (P_isnan(sp->d.dval)) + mawk_rt_error(MAWK, "NaN in conditional jump"); + if (sp->d.dval != MAWK_NUM_ZERO) { + /* take jump, but don't pop stack */ + cdp += cdp->op; + } + else { + /* pop and don't jump */ + sp--; + cdp++; + } + break; + + /* the relation operations */ + /* compare() makes sure string ref counts are OK */ + case _EQ: + dt = compare(MAWK, --sp); + sp->type = C_NUM; + sp->d.dval = P_nansafe_exp1((dt == 0 ? MAWK_NUM_ONE : MAWK_NUM_ZERO), dt); + break; + + case _NEQ: + dt = compare(MAWK, --sp); + sp->type = C_NUM; + sp->d.dval = P_nansafe_exp1((dt ? MAWK_NUM_ONE : MAWK_NUM_ZERO), dt); + break; + + case _LT: + dt = compare(MAWK, --sp); + sp->type = C_NUM; + sp->d.dval = P_nansafe_exp1((dt < 0 ? MAWK_NUM_ONE : MAWK_NUM_ZERO), dt); + break; + + case _LTE: + dt = compare(MAWK, --sp); + sp->type = C_NUM; + sp->d.dval = P_nansafe_exp1((dt <= 0 ? MAWK_NUM_ONE : MAWK_NUM_ZERO), dt); + break; + + case _GT: + dt = compare(MAWK, --sp); + sp->type = C_NUM; + sp->d.dval = P_nansafe_exp1((dt > 0 ? MAWK_NUM_ONE : MAWK_NUM_ZERO), dt); + break; + + case _GTE: + dt = compare(MAWK, --sp); + sp->type = C_NUM; + sp->d.dval = P_nansafe_exp1((dt >= 0 ? MAWK_NUM_ONE : MAWK_NUM_ZERO), dt); + break; + + case _MATCH0: + /* does $0 match, the RE at cdp? */ + + inc_sp(); + if (MAWK->field->type >= C_STRING) { + sp->type = C_NUM; + sp->d.dval = mawk_REtest(MAWK, string(MAWK->field)->str, cdp++->ptr) + ? MAWK_NUM_ONE : MAWK_NUM_ZERO; + + break /* the case */ ; + } + else { + mawk_cellcpy(MAWK, sp, MAWK->field); + /* and FALL THRU */ + } + + case _MATCH1: + /* does expr at sp[0] match RE at cdp */ + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + t = mawk_REtest(MAWK, string(sp)->str, cdp++->ptr); + free_STRING(string(sp)); + sp->type = C_NUM; + sp->d.dval = t ? MAWK_NUM_ONE : MAWK_NUM_ZERO; + break; + + + case _MATCH2: + /* does sp[-1] match sp[0] as re */ + mawk_cast_to_RE(MAWK, sp); + + if ((--sp)->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + t = mawk_REtest(MAWK, string(sp)->str, (sp + 1)->ptr); + + free_STRING(string(sp)); + sp->type = C_NUM; + sp->d.dval = t ? MAWK_NUM_ONE : MAWK_NUM_ZERO; + break; + + case A_TEST: + /* entry : sp[0].ptr-> an array + sp[-1] is an expression + + we compute (expression in array) */ + sp--; + t = mawk_array_find(MAWK, (mawk_array_t)((sp + 1)->ptr), sp, NULL, NO_MAWK_CREATE); + mawk_cell_destroy(MAWK, sp); + sp->type = C_NUM; + sp->d.dval = t ? MAWK_NUM_ONE : MAWK_NUM_ZERO; + break; + + case A_DEL: + /* sp[0].ptr -> array + sp[-1] is an expr + delete array[expr] */ + + mawk_array_delete(MAWK, (mawk_array_t)(sp->ptr), sp - 1); + mawk_cell_destroy(MAWK, sp - 1); + sp -= 2; + break; + + case DEL_A: + /* free all the array at once */ + mawk_array_clear(MAWK, (mawk_array_t)(sp->ptr)); + sp--; + break; + + /* form a multiple array index */ + case A_CAT: + sp = mawk_array_cat(MAWK, sp, cdp->op); + cdp++; + break; + + case _EXIT: + if (sp->type != C_NUM) + mawk_cast1_to_num(MAWK, sp); + MAWK->exit_code = d_to_i(sp->d.dval); + sp--; + /* fall thru */ + + case _EXIT0: + + if (!MAWK->end_start) { + mawk_exit_(MAWK, MAWK->exit_code); + goto out_exit; + } + + + cdp = MAWK->end_start; + MAWK->end_start = NULL; /* makes sure next exit exits */ + + if (MAWK->begin_start) { + mawk_zfree(MAWK, MAWK->begin_start, MAWK->begin_size); + MAWK->begin_start = NULL; + } + if (MAWK->main_start) { + mawk_zfree(MAWK, MAWK->main_start, MAWK->main_size); + MAWK->main_start = NULL; + } + sp = MAWK->eval_stack - 1; /* might be in user function */ + CLEAR_ALOOP_STACK(); /* ditto */ + + /* cdp is set up to point to END, go on executing that */ + break; + case _JMAIN: /* go from BEGIN code to MAIN code */ + mawk_zfree(MAWK, MAWK->begin_start, MAWK->begin_size); + MAWK->begin_start = NULL; + cdp = MAWK->main_start; + if (MAWK->separate_begin) + goto out_sepmain; + break; + + case _OMAIN: + if (!MAWK->main_input) + mawk_FINopen_main(MAWK); + if (!MAWK->main_input) + goto jump_end; + MAWK->restart_label = cdp; + cdp = MAWK->next_label; + break; + + case _NEXT: + /* next might be inside an aloop -- clear stack */ + CLEAR_ALOOP_STACK(); + cdp = MAWK->next_label; + break; + + case OL_GL: + { + char *p; + unsigned len; + + p = mawk_FINgets(MAWK, MAWK->main_input, &len); + if (p == (void *) mawk_FIN_nomore) { + cdp--; + goto out_nomore; + } + if (!p) { + if (!MAWK->end_start) + mawk_exitval(MAWK, 0, MAWK_EXER_DONE); + +jump_end:; + cdp = MAWK->end_start; + if (MAWK->main_start != NULL) + mawk_zfree(MAWK, MAWK->main_start, MAWK->main_size); + MAWK->main_start = MAWK->end_start = (INST *) 0; + } + else { + mawk_set_field0(MAWK, p, len); + cdp = MAWK->restart_label; + MAWK->rt_nr++; + MAWK->rt_fnr++; + } + } + break; + + /* two kinds of OL_GL is a historical stupidity from working on + a machine with very slow floating point emulation */ + case OL_GL_NR: + { + char *p; + unsigned len; + p = mawk_FINgets(MAWK, MAWK->main_input, &len); + if (p == (void *) mawk_FIN_nomore) { + cdp--; + goto out_nomore; + } + if (!p) { + if (!MAWK->end_start) + mawk_exitval(MAWK, 0, MAWK_EXER_DONE); + + cdp = MAWK->end_start; + if (MAWK->main_start != NULL) + mawk_zfree(MAWK, MAWK->main_start, MAWK->main_size); + MAWK->main_start = MAWK->end_start = NULL; + } + else { + mawk_set_field0(MAWK, p, len); + cdp = MAWK->restart_label; + + if (TEST2(NR) != TWO_NUMS) { + mawk_cast2_to_num(MAWK, NR); + } + + NR->d.dval += MAWK_NUM_ONE; + MAWK->rt_nr++; + FNR->d.dval += MAWK_NUM_ONE; + MAWK->rt_fnr++; + } + } + break; + + case _RANGE_CHK: +/* mawk_test a range pattern: pat1, pat2 { action } + entry : + cdp[0].op -- a flag, mawk_test pat1 if on else pat2 + cdp[1].op -- offset of pat2 code from cdp + cdp[2].op -- offset of action code from cdp + cdp[3].op -- offset of code after the action from cdp + cdp[4] -- start of pat1 code +*/ +#define FLAG cdp[0].op +#define PAT2 cdp[1].op +#define ACTION cdp[2].op +#define FOLLOW cdp[3].op +#define PAT1 4 + + db1printstack(MAWK, "--start range", sp, fp); + + /* FLAG: 0 means we are running the patten (we are between pat1 and pat2) */ + if (FLAG) { /* mawk_test again pat1 */ + /* prepare for executing pattern match for pat1 */ + mawk_push_exe_state(EXEST_RANGE1); + cdp = cdp + PAT1; + db1printf("matching pat1\n"); + break; + } + + range_chk_pat2:; + /* mawk_test against pat2 */ + mawk_push_exe_state(EXEST_RANGE2); + cdp = cdp + PAT2; + db1printf("matching pat2\n"); + break; + + case _RANGE_STOP: /* only for range patterns */ + cp = sp; /* remember the result of the pattern match expr */ + db1printstack(MAWK, "--stop cp", sp, fp); + sp--; + mawk_pop_exe_state(); + if (exest == EXEST_RANGE1) { + t = mawk_test(MAWK, cp); + mawk_cell_destroy(MAWK, cp); + if (t) { + FLAG = 0; + goto range_chk_pat2; + } + else { + cdp += FOLLOW; + break; /* break the switch */ + } + } + else if (exest == EXEST_RANGE2) { + /* pat2 and then perform the action */ + FLAG = mawk_test(MAWK, cp); + mawk_cell_destroy(MAWK, cp); + cdp += ACTION; + } + else + mawk_bozo(MAWK, "wrong execution state when popping range stop frame"); + exest = EXEST_NORMAL; + break; + +/* function calls */ + + case _RET0: +db1printf("RET0 on sp=%d\n", stackptr(sp)); + inc_sp(); + sp->type = C_NOINIT; + /* fall thru */ + + case _RET: + +#ifdef DEBUG + if (sp != entry_sp + 1) + mawk_bozo(MAWK, "ret"); +#endif + + db1printstack(MAWK, "-- ret entry", sp, fp); + + { + mawk_cell_t retval; + mawk_cell_t *pfp = fp; + FBLOCK *cfbp; + int cargs; + + /* have to save retval in case the stack got relocated */ + mawk_cellcpy(MAWK, &retval, sp); + mawk_cell_destroy(MAWK, sp); + sp--; + db1printf("RET before removing overhead (retval already removed) sp=%d\n", stackptr(sp)); + db1printstack(MAWK, "-- ret before overhead", sp, fp); + + if (old_stack_base) { /* reset stack */ + /* move the return value */ + mawk_cellcpy(MAWK, old_sp + 1, &retval); + mawk_cell_destroy(MAWK, &retval); + + /* restore */ + mawk_zfree(MAWK, MAWK->stack_base, sizeof(mawk_cell_t) * EVAL_STACK_SIZE); + MAWK->stack_base = old_stack_base; + MAWK->stack_danger = old_stack_base + DANGER; + sp = old_sp; + } + + /* return might be inside an aloop -- clear stack */ + CLEAR_ALOOP_STACK(); + + cfbp = call_fbp; + cargs = call_a_args; + + /* get back at the caller context */ + mawk_pop_exe_state(); + db1printf("RET after removing overhead sp=%d pfp=%d\n", stackptr(sp), stackptr(pfp)); + + if (cfbp != NULL) { + db1printf("removing %d locals\n", pfp-sp+1); + mawk_call_post(cfbp, cargs); + } + sp = pfp; + + /* save retval to current sp (which might be in an older stack block) */ + mawk_cellcpy(MAWK, sp, &retval); + mawk_cell_destroy(MAWK, &retval); + + /* caller was not plain awk code, has to return immediately after the func */ + if (exest == EXEST_EXIT) + goto out_funcret; + + /* go on executing normally; we are back at the caller's context and + sp contains the return value */ + } + break; + + case _CALL: + { + /* cdp[0] holds ptr to "function block" + cdp[1] holds number of input arguments + */ + mawk_cell_t *pfp; + FBLOCK *fbp = (FBLOCK *) cdp++->ptr; + int a_args = cdp++->op; /* actual/caller's number of args already pushed on stack */ + + /* might be just a C function */ + cp = mawk_call_c_func(MAWK, sp, fbp, a_args); + if (cp != NULL) { + /* C function call succeeded, new sp is returned by the user function */ + sp = cp; + break; + } + + /* native awk user function */ + pfp = sp - a_args + 1; + { + mawk_call_pre(); + + db1printf("++ CALL sp=%d a_args(caller)=%d fbp->nargs(callee)=%d\n", stackptr(sp), a_args,fbp->nargs); + mawk_push_exe_state(EXEST_NORMAL); + aloop_state = NULL; + +/*sp - RECURSION_OVERHEAD - fbp->nargs + a_args*/ + fp = pfp; + db1printf("CALL fp=%d final-sp=%d\n", stackptr(fp), stackptr(sp)); + + call_fbp = fbp; + call_a_args = a_args; + cdp = fbp->code; + db1printf("CALL new=%d\n", stackptr(sp)); + } + } + goto call_entry; + case LOCATION: + mawk_location_change(MAWK, cdp->op); + cdp++; + break; + default: + mawk_bozo(MAWK, "bad opcode"); + } + } /* while */ + + /* got here because exit */ + out:; + MAWK->sp = sp; + db1printf("exe out sp=%d\n", stackptr(sp)); + return MAWK_EXER_DONE; + + /* jump here to indicate return from a function; top of stack is the retval */ + out_funcret:; + MAWK->sp = sp; + db1printf("exe out_funcret sp=%d\n", stackptr(sp)); + return MAWK_EXER_FUNCRET; + + /* jump here to indicate execution interrupted due to read block (no more input) */ + out_nomore:; + mawk_push_exe_state(EXEST_NORMAL); /* for a later resume */ + MAWK->sp = sp; + db1printf("exe out_nomore sp=%d\n", stackptr(sp)); + return MAWK_EXER_INT_READ; + + /* jump here to indicate execution interrupted due to read block (no more input) */ + out_runlimit:; + mawk_push_exe_state(EXEST_NORMAL); /* for a later resume */ + MAWK->sp = sp; + db1printf("exe out_runlimit sp=%d\n", stackptr(sp)); + db1printstack(MAWK, "-- runlimit push", sp, fp); + return MAWK_EXER_INT_RUNLIMIT; + + /* jump here to interrupt execution before running main */ + out_sepmain:; + mawk_push_exe_state(EXEST_NORMAL); /* for a later resume */ + MAWK->sp = sp; + db1printf("exe separate main sp=%d\n", stackptr(sp)); + db1printstack(MAWK, "-- sepmain push", sp, fp); + return MAWK_EXER_INT_SEPMAIN; + + + /* jump here for hard exit; discards the whole stack! */ + out_exit:; + /* if we hit exit, we sure won't need the eval stack anymore */ + MAWK->sp = MAWK->eval_stack; + db1printf("exe out_exit sp=%d\n", stackptr(MAWK->sp)); + return MAWK_EXER_EXIT; +} + +/* entry point: start executing cdp (BEGIN, END or main) */ +void mawk_execute(mawk_state_t *MAWK, register INST *cdp, register mawk_cell_t *sp, mawk_cell_t *fp) +{ + mawk_cell_t *old_stack_base = 0, *old_sp = 0; /* for moving the eval stack on deep recursion */ + ALOOP_STATE *aloop_state = (ALOOP_STATE *) 0; + FBLOCK *call_fbp = NULL; /* user function being executed */ + int call_a_args = 0; /* number of caller args in user func being executed */ + + db1printf("=== mawk_execute()\n"); + mawk_push_exe_state(EXEST_EXIT); /* this will get execute to exit at the end */ + mawk_push_exe_state(EXEST_NORMAL); + MAWK->sp = sp; + mawk_execute_(MAWK); +} + +mawk_exec_result_t mawk_resume(mawk_state_t *MAWK) +{ + db1printf("=== mawk_resume()\n"); + if (MAWK->eval_stack == MAWK->sp) + return MAWK_EXER_ERR_NOSTACK; + return mawk_execute_(MAWK); +} + +/* entry point: Call awk function fbp with a_args already pushed on + the stack. sp is the stack pointer that is returned after the operation. + + This function is called only from outside of execute.c, vm func calls + are embedded in execute_(). +*/ +mawk_exec_result_t mawk_call(mawk_state_t * MAWK, FBLOCK * fbp, int a_args, mawk_cell_t *res) +{ + mawk_exec_result_t exer; + mawk_cell_t *old_stack_base = 0, *old_sp = 0; + ALOOP_STATE *aloop_state = (ALOOP_STATE *) 0; + mawk_cell_t *sp = MAWK->sp; + mawk_call_vars; + mawk_cell_t *cp, *fp = sp - a_args+1; + INST *cdp = fbp->code; + FBLOCK *call_fbp = NULL; /* user function being executed */ + int call_a_args = 0; /* number of caller args in user func being executed */ + + db1printstack(MAWK, "=== mawk_call_do()\n", sp, fp); + + /* might be just a C function */ + cp = mawk_call_c_func(MAWK, sp, fbp, a_args); + if (cp != NULL) { + MAWK->sp = cp; + exer = MAWK_EXER_FUNCRET; + goto copy_retv; + } + + { + mawk_call_pre(); + + MAWK->sp = sp; + db1printstack(MAWK, "mawk_call_do2()\n", sp, fp); + mawk_push_exe_state(EXEST_EXIT); + MAWK->sp = sp; + db1printstack(MAWK, "mawk_call_do3()\n", sp, fp); + mawk_push_exe_state(EXEST_NORMAL); /* to get mawk_execute_ to run our code */ + MAWK->sp = sp; + db1printstack(MAWK, "mawk_call_do4()\n", sp, fp); + MAWK->sp = sp; + exer = mawk_execute_(MAWK); + db1printstack(MAWK, "mawk_call_AFTER()\n", MAWK->sp, fp); + } + + copy_retv:; + if (exer == MAWK_EXER_FUNCRET) { + mawk_cellcpy(MAWK, res, MAWK->sp); + mawk_cell_destroy(MAWK, MAWK->sp); + MAWK->sp--; + } + + return exer; +} + + +/* + return 0 if a numeric is zero else return non-zero + return 0 if a string is "" else return non-zero +*/ +int mawk_test(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ +reswitch: + + switch (cp->type) { + case C_NOINIT: + return 0; + case C_STRNUM: /* mawk_test as a number */ + case C_NUM: + return cp->d.dval != MAWK_NUM_ZERO; + case C_STRING: + return string(cp)->len; + case C_MBSTRN: + mawk_check_strnum(MAWK, cp); + goto reswitch; + default: + mawk_bozo(MAWK, "bad cell type in call to mawk_test"); + } + return 0; /*can't get here: shutup */ +} + +/* compare cells at cp and cp+1 and + frees STRINGs at those cells +*/ +static double compare(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + int k; + +reswitch: + + switch (TEST2(cp)) { + case TWO_NOINITS: + return 0; + + case TWO_NUMS: + two_d: + if (P_isnan_manual((cp + 1)->d.dval)) + return P_nan(); + if (P_isnan_manual(cp->d.dval)) + return P_nan(); + return cp->d.dval > (cp + 1)->d.dval ? 1 : cp->d.dval < (cp + 1)->d.dval ? -1 : 0; + + case TWO_STRINGS: + case STRING_AND_STRNUM: + two_s: + k = strcmp(string(cp)->str, string(cp + 1)->str); + free_STRING(string(cp)); + free_STRING(string(cp + 1)); + return (double)k; + + case NOINIT_AND_NUM: + case NOINIT_AND_STRNUM: + case NUM_AND_STRNUM: + case TWO_STRNUMS: + mawk_cast2_to_num(MAWK, cp); + goto two_d; + case NOINIT_AND_STRING: + case NUM_AND_STRING: + mawk_cast2_to_str(MAWK, cp); + goto two_s; + case TWO_MBSTRNS: + mawk_check_strnum(MAWK, cp); + mawk_check_strnum(MAWK, cp + 1); + goto reswitch; + + case NOINIT_AND_MBSTRN: + case NUM_AND_MBSTRN: + case STRING_AND_MBSTRN: + case STRNUM_AND_MBSTRN: + mawk_check_strnum(MAWK, cp->type == C_MBSTRN ? cp : cp + 1); + goto reswitch; + + default: /* there are no default cases */ + mawk_bozo(MAWK, "bad cell type passed to compare"); + } + return 0; /* shut up */ +} + +/* convert a number d to a field index $d -> $i */ +static int d_to_index(mawk_state_t *MAWK, mawk_num_t d) +{ + + if (d > MAX_FIELD) + mawk_rt_overflow(MAWK, "maximum number of fields", MAX_FIELD); + + if (d >= MAWK_NUM_ZERO) + return (int) d; + + /* might include nan */ + mawk_rt_error(MAWK, "negative field index $%.6g", d); + return 0; /* shutup */ +} + +void mawk_dummy_execute_func(void) { (void)mawk_f2d(NULL); } /* suppress compiler warning on unused func */ diff --git a/src/libmawk/execute.h b/src/libmawk/execute.h new file mode 100644 index 0000000..883187e --- /dev/null +++ b/src/libmawk/execute.h @@ -0,0 +1,21 @@ +#ifndef MAWK_EXECUTE_H +#define MAWK_EXECUTE_H + +typedef enum mawk_exec_result_e { + MAWK_EXER_FUNCRET, /* function return, return value is on top of the stack */ + MAWK_EXER_DONE, /* done with BEGIN or END */ + MAWK_EXER_EXIT, /* script ran exit */ + MAWK_EXER_INT_READ, /* was running BEGIN, END, main or function, got blocked in a read */ + MAWK_EXER_INT_RUNLIMIT, /* was running BEGIN, END, main or function, reached run limit */ + MAWK_EXER_INT_SEPMAIN, /* about to jump on main but separate exetuion of main is configured */ + MAWK_EXER_ERR_NOSTACK /* can't resume: nothing's on the stack */ +} mawk_exec_result_t; + + +/* resume execution of an interrupted script */ +mawk_exec_result_t mawk_resume(mawk_state_t *MAWK); + +mawk_exec_result_t mawk_call(mawk_state_t * MAWK, FBLOCK * fbp, int a_args, mawk_cell_t *res); +int mawk_test(mawk_state_t *MAWK, register mawk_cell_t *cp); + +#endif diff --git a/src/libmawk/execute_debug.h b/src/libmawk/execute_debug.h new file mode 100644 index 0000000..91bd171 --- /dev/null +++ b/src/libmawk/execute_debug.h @@ -0,0 +1,49 @@ +/* this is not a real header and is included in the middle of execute.c to + provide real or dummy call debug functionality */ +/*#define CALLDEBUG*/ + +#ifdef CALLDEBUG +/* real prints */ +#include +#define db1printf(x...) fprintf(stderr, x) +#define stackptr(p) ((p) == NULL ? -1 : (p)-MAWK->eval_stack) +extern void mawk_print_cell(mawk_state_t *, mawk_cell_t *, FILE_NODE *); +static void db1printstack(mawk_state_t *MAWK, char *ann, mawk_cell_t *sp, mawk_cell_t *fp) +{ + mawk_cell_t *c; + db1printf("%s\n", ann); + for(c = sp; c >= MAWK->eval_stack; c--) { + if (fp == c) + db1printf("*"); + else + db1printf(" "); + db1printf("[%d] ", stackptr(c)); + if (c->type == C_EXE_STTYPE) { + db1printf("exest="); + switch(c->d.vcnt) { + case EXEST_NORMAL: db1printf("EXEST_NORMAL\n"); break; + case EXEST_EXIT: db1printf("EXEST_EXIT\n"); break; + case EXEST_RANGE1: db1printf("EXEST_RANGE1\n"); break; + case EXEST_RANGE2: db1printf("EXEST_RANGE2\n"); break; + default: db1printf("EXEST_%d\n", c->d.vcnt); break; + } + } + else if (c->type == C_EXE_STATE) + db1printf("state=%p\n", c->ptr); + else if (c->type == C_ARR_REF) + db1printf("date=arr_ref=%p\n", c->ptr); + else { + db1printf("data=%d=", c->type); + mawk_print_cell(MAWK, c, MAWK->fnode_stderr); + db1printf("\n"); + } + } +} +#else + +/* dummy prints */ +static void db1printf(const char *fmt, ...) { } +#define stackptr(p) 0 +#define db1printstack(mawk, ann, sp, fp) +#endif + diff --git a/src/libmawk/f2d.h b/src/libmawk/f2d.h new file mode 100644 index 0000000..13a95ee --- /dev/null +++ b/src/libmawk/f2d.h @@ -0,0 +1,41 @@ +/******************************************** +f2d.h + +libmawk changes (C) 2018, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language. + +libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef MAWK_F2D_H +#define MAWK_F2D_H + +typedef void (mawk_generic_func)(); + +/* C89 doesn't allow function pointer vs. data pointer casts */ +union mawk_f2d_u { + void *data; + mawk_generic_func *func; +}; + +static void *mawk_f2d_(mawk_generic_func *func) +{ + union mawk_f2d_u tmp; + tmp.func = func; + return tmp.data; +} + +static mawk_generic_func *mawk_d2f_(void *data) +{ + union mawk_f2d_u tmp; + tmp.data = data; + return tmp.func; +} + +#define mawk_f2d(f) mawk_f2d_((mawk_generic_func *)(f)) +#define mawk_d2f(d) mawk_d2f_((void *)(d)) + +#endif diff --git a/src/libmawk/fcall.c b/src/libmawk/fcall.c new file mode 100644 index 0000000..53c0417 --- /dev/null +++ b/src/libmawk/fcall.c @@ -0,0 +1,340 @@ + +/******************************************** +fcall.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "symtype.h" +#include "code.h" + +/* This file has functions involved with type checking of + function calls +*/ + +static FCALL_REC *first_pass(mawk_state_t *, FCALL_REC *); +static CA_REC *call_arg_check(mawk_state_t *, FBLOCK *, CA_REC *, INST *, unsigned); +static int arg_cnt_ok(mawk_state_t *, FBLOCK *, CA_REC *, unsigned); +static void relocate_arglist(CA_REC *, int, unsigned, int); + +/* type checks a list of call arguments, + returns a list of arguments whose type is still unknown +*/ +static CA_REC *call_arg_check(mawk_state_t *MAWK, FBLOCK *callee, CA_REC *entry_list, INST *start, unsigned line_no) +{ + /* start -> to locate patch */ + /* line_no -> for error messages */ + register CA_REC *q; + CA_REC *exit_list = (CA_REC *) 0; + + MAWK->check_progress = 0; + + /* loop : + take q off entry_list + mawk_test it + if OK mawk_zfree(MAWK, q) else put on exit_list */ + while ((q = entry_list)) { + entry_list = q->link; + + if (q->type == ST_NONE) { + /* try to infer the type */ + /* it might now be in symbol table */ + if (q->sym_p->type == ST_VAR) { + /* set type and patch */ + q->type = CA_EXPR; + start[q->call_offset + 1].ptr = (PTR) q->sym_p->stval.cp; + } + else if (q->sym_p->type == ST_ARRAY) { + q->type = CA_ARRAY; + start[q->call_offset].op = A_PUSHA; + start[q->call_offset + 1].ptr = (PTR) q->sym_p->stval.array; + } + else { /* try to infer from callee */ + + switch (callee->typev[q->arg_num]) { + case ST_LOCAL_VAR: + q->type = CA_EXPR; + q->sym_p->type = ST_VAR; + q->sym_p->stval.cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); + q->sym_p->stval.cp->type = C_NOINIT; + start[q->call_offset + 1].ptr = (PTR) q->sym_p->stval.cp; + break; + + case ST_LOCAL_ARRAY: + q->type = CA_ARRAY; + q->sym_p->type = ST_ARRAY; + q->sym_p->stval.array = mawk_array_new(MAWK, NULL); + start[q->call_offset].op = A_PUSHA; + start[q->call_offset + 1].ptr = (PTR) q->sym_p->stval.array; + break; + } + } + } + else if (q->type == ST_LOCAL_NONE) { + /* try to infer the type */ + if (*q->type_p == ST_LOCAL_VAR) { + /* set type , don't need to patch */ + q->type = CA_EXPR; + } + else if (*q->type_p == ST_LOCAL_ARRAY) { + q->type = CA_ARRAY; + start[q->call_offset].op = LA_PUSHA; + /* offset+1 op is OK */ + } + else { /* try to infer from callee */ + + switch (callee->typev[q->arg_num]) { + case ST_LOCAL_VAR: + q->type = CA_EXPR; + *q->type_p = ST_LOCAL_VAR; + /* do not need to patch */ + break; + + case ST_LOCAL_ARRAY: + q->type = CA_ARRAY; + *q->type_p = ST_LOCAL_ARRAY; + start[q->call_offset].op = LA_PUSHA; + break; + } + } + } + + /* if we still do not know the type put on the new list + else type check */ + if (q->type == ST_NONE || q->type == ST_LOCAL_NONE) { + q->link = exit_list; + exit_list = q; + } + else { /* type known */ + + if (callee->typev[q->arg_num] == ST_LOCAL_NONE) + callee->typev[q->arg_num] = q->type; + else if (q->type != callee->typev[q->arg_num]) + mawk_compile_error(MAWK, "type error in arg(%d) in call to %s", q->arg_num + 1, callee->name); + + MAWK_ZFREE(MAWK, q); + MAWK->check_progress = 1; + } + } /* while */ + + return exit_list; +} + + +static int arg_cnt_ok(mawk_state_t *MAWK, FBLOCK *fbp, CA_REC *q, unsigned line_no) +{ + if ((int) q->arg_num >= (int) fbp->nargs) + /* casts shutup stupid warning from solaris sun cc */ + { + mawk_compile_error(MAWK, "too many arguments in call to %s", fbp->name); + return 0; + } + else + return 1; +} + + /* function calls whose arg types need checking + are stored on this list */ + + +/* on first pass thru the resolve list + we check : + if forward referenced functions were really defined + if right number of arguments + and compute call_start which is now known +*/ + +static FCALL_REC *first_pass(mawk_state_t *MAWK, register FCALL_REC *p) +{ + FCALL_REC dummy; + register FCALL_REC *q = &dummy; /* trails p */ + + q->link = p; + while (p) { + if (!p->callee->code) { + /* callee never defined */ + if (!MAWK->suppress_undefined_function_warning) + mawk_compile_error(MAWK, "function %s never defined", p->callee->name); + /* delete p from list */ + q->link = p->link; + /* don't worry about freeing memory, we'll exit soon */ + } + /* note p->arg_list starts with last argument */ + else if (!p->arg_list /* nothing to do */ || + (!p->arg_cnt_checked && !arg_cnt_ok(MAWK, p->callee, p->arg_list, p->line_no))) { + q->link = p->link; /* delete p */ + /* the ! arg_list case is not an error so free memory */ + MAWK_ZFREE(MAWK, p); + } + else { + /* keep p and set call_start */ + q = p; + switch (p->call_scope) { + case SCOPE_MAIN: + p->call_start = MAWK->main_start; + break; + + case SCOPE_BEGIN: + p->call_start = MAWK->begin_start; + break; + + case SCOPE_END: + p->call_start = MAWK->end_start; + break; + + case SCOPE_FUNCT: + p->call_start = p->call->code; + break; + } + } + p = q->link; + } + return dummy.link; +} + +/* continuously walk the resolve_list making type deductions + until this list goes empty or no more progress can be made + (An example where no more progress can be made is at end of file +*/ + +void mawk_resolve_fcalls(mawk_state_t * MAWK) +{ + register FCALL_REC *p, *old_list, *new_list; + int progress; /* a flag */ + + old_list = first_pass(MAWK, MAWK->resolve_list); + new_list = (FCALL_REC *) 0; + progress = 0; + + while (1) { + if (!old_list) { + /* flop the lists */ + old_list = new_list; + if (!old_list /* nothing left */ + || !progress /* can't do any more */ ) + return; + + new_list = (FCALL_REC *) 0; + progress = 0; + } + + p = old_list; + old_list = p->link; + + if ((p->arg_list = call_arg_check(MAWK, p->callee, p->arg_list, p->call_start, p->line_no))) { + /* still have work to do , put on new_list */ + progress |= MAWK->check_progress; + p->link = new_list; + new_list = p; + } + else { + /* done with p */ + progress = 1; + MAWK_ZFREE(MAWK, p); + } + } +} + +/* the mawk_parser has just reduced a function call ; + the info needed to type check is passed in. If type checking + can not be done yet (most common reason -- function referenced + but not defined), a node is added to the resolve list. +*/ +void mawk_check_fcall(mawk_state_t *MAWK, FBLOCK *callee, int call_scope, int move_level, FBLOCK *call, CA_REC *arg_list, unsigned line_no) +{ + FCALL_REC *p; + + if (!callee->code) { + /* forward reference to a function to be defined later */ + p = MAWK_ZMALLOC(MAWK, FCALL_REC); + p->callee = callee; + p->call_scope = call_scope; + p->move_level = move_level; + p->call = call; + p->arg_list = arg_list; + p->arg_cnt_checked = 0; + p->line_no = line_no; + /* add to resolve list */ + p->link = MAWK->resolve_list; + MAWK->resolve_list = p; + } + else if (arg_list && arg_cnt_ok(MAWK, callee, arg_list, line_no)) { + /* usually arg_list disappears here and all is well + otherwise add to resolve list */ + + if ((arg_list = call_arg_check(MAWK, callee, arg_list, mawk_code_base, line_no))) { + p = MAWK_ZMALLOC(MAWK, FCALL_REC); + p->callee = callee; + p->call_scope = call_scope; + p->move_level = move_level; + p->call = call; + p->arg_list = arg_list; + p->arg_cnt_checked = 1; + p->line_no = line_no; + /* add to resolve list */ + p->link = MAWK->resolve_list; + MAWK->resolve_list = p; + } + } +} + + +/* code_pop() has just moved some code. If this code contains + a function call, it might need to be relocated on the + resolve list too. This function does it. +*/ +void mawk_relocate_resolve_list(mawk_state_t *MAWK, int scope, int move_level, FBLOCK *fbp, int orig_offset, unsigned len, int delta) +{ + /* delta -> relocation distance */ + FCALL_REC *p = MAWK->resolve_list; + + while (p) { + if (scope == p->call_scope && move_level == p->move_level && (scope == SCOPE_FUNCT ? fbp == p->call : 1)) { + relocate_arglist(p->arg_list, orig_offset, len, delta); + } + p = p->link; + } +} + +static void relocate_arglist(CA_REC *arg_list, int offset, unsigned len, int delta) +{ + register CA_REC *p; + + if (!arg_list) + return; + + p = arg_list; + /* all nodes must be relocated or none, so mawk_test the + first one */ + + /* Note: call_offset is always set even for args that don't need to + be patched so that this check works. */ + if (p->call_offset < offset || p->call_offset >= offset + len) + return; + + /* relocate the whole list */ + do { + p->call_offset += delta; + p = p->link; + } + while (p); +} + +/* example where typing cannot progress + +{ f(z) } + +function f(x) { print NR } + +# this is legal, does something useful, but absurdly written +# We have to design so this works +*/ diff --git a/src/libmawk/field.h b/src/libmawk/field.h new file mode 100644 index 0000000..daad98d --- /dev/null +++ b/src/libmawk/field.h @@ -0,0 +1,68 @@ + +/******************************************** +field.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef FIELD_H +#define FIELD_H 1 + +void mawk_set_field0(mawk_state_t *, char *, unsigned); +void mawk_split_field0(mawk_state_t *); +int mawk_space_split(mawk_state_t *, char *, unsigned); +int mawk_re_split(mawk_state_t *, char *, PTR); +int mawk_null_split(mawk_state_t *, char *); +void mawk_field_assign(mawk_state_t *, mawk_cell_t *, mawk_cell_t *); +char *mawk_is_string_split(PTR, unsigned *); +void mawk_bifunct_target_assign(mawk_state_t *, mawk_cell_t *, mawk_cell_t *); +mawk_cell_t *mawk_slow_field_ptr(mawk_state_t *, int); +int mawk_field_addr_to_index(mawk_state_t *, mawk_cell_t *); +void set_binmode(int); +void mawk_load_pfield(mawk_state_t *, char *, mawk_cell_t *); + +#ifdef MAWK_MEM_PEDANTIC +void mawk_field_uninit(mawk_state_t * MAWK); +#endif + +/* some compilers choke on (NF-field) in a case statement + even though it's constant so ... +*/ +#define MAWK_NF_field (MAX_SPLIT+1) +#define MAWK_RS_field (MAX_SPLIT+2) +#define MAWK_FS_field (MAX_SPLIT+3) +#define MAWK_CONVFMT_field (MAX_SPLIT+4) +#define MAWK_OFMT_field (MAX_SPLIT+5) + +/* index to mawk_cell_t * for a field */ +#define field_ptr(i) ((i)<=MAX_SPLIT ? MAWK->field+(i):mawk_slow_field_ptr(MAWK, i)) + +/* the pseudo fields, assignment has side effects */ +#define MAWK_NF (MAWK->field+MAX_SPLIT+1) /* must be first */ +#define MAWK_RS (MAWK->field+MAX_SPLIT+2) +#define MAWK_FS (MAWK->field+MAX_SPLIT+3) +#define MAWK_CONVFMT (MAWK->field+MAX_SPLIT+4) +#define MAWK_OFMT (MAWK->field+MAX_SPLIT+5) /* must be last */ + +#define LAST_PFIELD MAWK_OFMT + +/* a shadow type for RS and FS */ +#define SEP_SPACE 0 +#define SEP_CHAR 1 +#define SEP_STR 2 +#define SEP_RE 3 +#define SEP_MLR 4 + +/* types for splitting mawk_overflow */ + + +#endif /* FIELD_H */ diff --git a/src/libmawk/field_common.c b/src/libmawk/field_common.c new file mode 100644 index 0000000..393bda2 --- /dev/null +++ b/src/libmawk/field_common.c @@ -0,0 +1,116 @@ +/******************************************** +field_common.c - $ field operation required in both compile and execute time + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "field.h" +#include "init.h" +#include "memory.h" +#include "scan.h" +#include "bi_vars.h" +#include "repl.h" +#include "regexp.h" + +void mawk_load_pfield(mawk_state_t *MAWK, char *name, mawk_cell_t *cp) +{ + SYMTAB *stp; + + stp = mawk_insert(MAWK, name); + stp->type = ST_FIELD; + stp->stval.cp = cp; +} + + +int mawk_field_addr_to_index(mawk_state_t *MAWK, mawk_cell_t *cp) +{ + mawk_cell_t **p = MAWK->fbank; + + while (cp < *p || cp >= *p + FBANK_SZ) + p++; + + return ((p - MAWK->fbank) << FB_SHIFT) + (cp - *p); +} + +/*------- more than 1 fbank needed ------------*/ + +/* + compute the address of a field with index + > MAX_SPLIT +*/ +mawk_cell_t *mawk_slow_field_ptr(mawk_state_t *MAWK, register int i) +{ + + if (i > MAWK->max_field) { + int j; + + if (i > MAX_FIELD) + mawk_rt_overflow(MAWK, "maximum number of fields", MAX_FIELD); + + j = 1; + while (MAWK->fbank[j]) + j++; + + do { + MAWK->fbank[j] = (mawk_cell_t *) mawk_zmalloc(MAWK, sizeof(mawk_cell_t) * FBANK_SZ); + memset(MAWK->fbank[j], 0, sizeof(mawk_cell_t) * FBANK_SZ); + j++; + MAWK->max_field += FBANK_SZ; + } + while (i > MAWK->max_field); + } + + return &MAWK->fbank[i >> FB_SHIFT][i & (FBANK_SZ - 1)]; +} + + +/* mawk_initialize $0 and the pseudo fields */ +void mawk_field_init(mawk_state_t * MAWK) +{ + MAWK->field[0].type = C_STRING; + MAWK->field[0].ptr = (PTR) & MAWK->null_str; + MAWK->null_str.ref_cnt++; + + mawk_load_pfield(MAWK, "NF", MAWK_NF); + MAWK_NF->type = C_NUM; + MAWK_NF->d.dval = MAWK_NUM_ZERO; + + mawk_load_pfield(MAWK, "RS", MAWK_RS); + MAWK_RS->type = C_STRING; + MAWK_RS->ptr = (PTR) mawk_new_STRING(MAWK, "\n"); + /* rs_shadow already set */ + + mawk_load_pfield(MAWK, "FS", MAWK_FS); + MAWK_FS->type = C_STRING; + MAWK_FS->ptr = (PTR) mawk_new_STRING(MAWK, " "); + /* fs_shadow is already set */ + + mawk_load_pfield(MAWK, "OFMT", MAWK_OFMT); + MAWK_OFMT->type = C_STRING; + MAWK_OFMT->ptr = (PTR) mawk_new_STRING(MAWK, "%.6g"); + + mawk_load_pfield(MAWK, "CONVFMT", MAWK_CONVFMT); + MAWK_CONVFMT->type = C_STRING; + MAWK_CONVFMT->ptr = MAWK_OFMT->ptr; + string(MAWK_OFMT)->ref_cnt++; +} + +#ifdef MAWK_MEM_PEDANTIC +void mawk_field_uninit(mawk_state_t * MAWK) +{ + mawk_delete(MAWK, "NF", 1); + mawk_delete(MAWK, "RS", 1); + mawk_delete(MAWK, "FS", 1); + mawk_delete(MAWK, "OFMT", 1); + mawk_delete(MAWK, "CONVFMT", 1); +} +#endif diff --git a/src/libmawk/field_exec.c b/src/libmawk/field_exec.c new file mode 100644 index 0000000..bb4f736 --- /dev/null +++ b/src/libmawk/field_exec.c @@ -0,0 +1,456 @@ + +/******************************************** +field.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include +#include "mawk.h" +#include "field.h" +#include "init.h" +#include "memory.h" +#include "scan.h" +#include "bi_vars.h" +#include "repl.h" +#include "regexp.h" +#include "cell.h" + +static void build_field0(mawk_state_t *); +static void set_rs_shadow(mawk_state_t *); +static void load_field_ov(mawk_state_t *); + +static void set_rs_shadow(mawk_state_t * MAWK) +{ + mawk_cell_t c; + mawk_string_t *sval; + char *s; + unsigned len; + + if (MAWK->posix_space_flag && MAWK->mawk_state == EXECUTION) + MAWK->scan_code['\n'] = SC_UNEXPECTED; + + if (MAWK->rs_shadow.type == SEP_STR) { + free_STRING((mawk_string_t *) MAWK->rs_shadow.ptr); + } + + mawk_cellcpy(MAWK, &c, MAWK_RS); + mawk_cast_for_split(MAWK, &c); + switch (c.type) { + case C_RE: + if ((s = mawk_is_string_split(c.ptr, &len))) { + if (len == 1) { + MAWK->rs_shadow.type = SEP_CHAR; + MAWK->rs_shadow.c = s[0]; + } + else { + MAWK->rs_shadow.type = SEP_STR; + MAWK->rs_shadow.ptr = (PTR) mawk_new_STRING(MAWK, s); + } + } + else { + MAWK->rs_shadow.type = SEP_RE; + MAWK->rs_shadow.ptr = c.ptr; + } + break; + + case C_SPACE: + MAWK->rs_shadow.type = SEP_CHAR; + MAWK->rs_shadow.c = ' '; + break; + + case C_SNULL: /* RS becomes one or more blank lines */ + if (MAWK->mawk_state == EXECUTION) + MAWK->scan_code['\n'] = SC_SPACE; + MAWK->rs_shadow.type = SEP_MLR; + sval = mawk_new_STRING(MAWK, "\n\n+"); + MAWK->rs_shadow.ptr = mawk_re_compile(MAWK, sval); + free_STRING(sval); + break; + + default: + mawk_bozo(MAWK, "bad cell in set_rs_shadow"); + } +} + + +void mawk_set_field0(mawk_state_t *MAWK, char *s, unsigned len) +{ + mawk_cell_destroy(MAWK, &MAWK->field[0]); + MAWK->nf = -1; + + if (len) { + MAWK->field[0].type = C_MBSTRN; + MAWK->field[0].ptr = (PTR) mawk_new_STRING0(MAWK, len); + memcpy(string(&MAWK->field[0])->str, s, len); + } + else { + MAWK->field[0].type = C_STRING; + MAWK->field[0].ptr = (PTR) & MAWK->null_str; + MAWK->null_str.ref_cnt++; + } +} + + + +/* split field[0] into $1, $2 ... and set NF */ + +void mawk_split_field0(mawk_state_t * MAWK) +{ + register mawk_cell_t *cp; + register int cnt; + mawk_cell_t c; /* copy field[0] here if not string */ + + + if (MAWK->field[0].type < C_STRING) { + mawk_cellcpy(MAWK, &c, MAWK->field + 0); + mawk_cast1_to_str(MAWK, &c); + cp = &c; + } + else + cp = &MAWK->field[0]; + + if (string(cp)->len == 0) + MAWK->nf = 0; + else { + switch (MAWK->fs_shadow.type) { + case C_SNULL: /* FS == "" */ + MAWK->nf = mawk_null_split(MAWK, string(cp)->str); + break; + + case C_SPACE: + MAWK->nf = mawk_space_split(MAWK, string(cp)->str, string(cp)->len); + break; + + default: + MAWK->nf = mawk_re_split(MAWK, string(cp)->str, MAWK->fs_shadow.ptr); + break; + } + + } + + mawk_cell_destroy(MAWK, MAWK_NF); + MAWK_NF->type = C_NUM; + MAWK_NF->d.dval = (mawk_num_t) MAWK->nf; + + if (MAWK->nf > MAX_SPLIT) { + cnt = MAX_SPLIT; + load_field_ov(MAWK); + } + else + cnt = MAWK->nf; + + while (cnt > 0) { + mawk_cell_destroy(MAWK, MAWK->field + cnt); + MAWK->field[cnt].ptr = (PTR) split_buff[cnt - 1]; + MAWK->field[cnt--].type = C_MBSTRN; + } + + if (cp == &c) { + free_STRING(string(cp)); + } +} + +/* construct field[0] from the other fields */ + +static void build_field0(mawk_state_t * MAWK) +{ +#ifdef DEBUG + if (MAWK->nf < 0) + mawk_bozo(MAWK, "nf <0 in build_field0"); +#endif + + mawk_cell_destroy(MAWK, MAWK->field + 0); + + if (MAWK->nf == 0) { + MAWK->field[0].type = C_STRING; + MAWK->field[0].ptr = (PTR) & MAWK->null_str; + MAWK->null_str.ref_cnt++; + } + else if (MAWK->nf == 1) { + mawk_cellcpy(MAWK, MAWK->field, MAWK->field + 1); + } + else { + mawk_cell_t c; + mawk_string_t *ofs, *tail; + unsigned len; + register mawk_cell_t *cp; + register char *p, *q; + int cnt; + mawk_cell_t **fbp, *cp_limit; + + + mawk_cellcpy(MAWK, &c, OFS); + mawk_cast1_to_str(MAWK, &c); + ofs = (mawk_string_t *) c.ptr; + mawk_cellcpy(MAWK, &c, field_ptr(MAWK->nf)); + mawk_cast1_to_str(MAWK, &c); + tail = (mawk_string_t *) c.ptr; + cnt = MAWK->nf - 1; + + len = cnt * ofs->len + tail->len; + + fbp = MAWK->fbank; + cp_limit = MAWK->field + FBANK_SZ; + cp = MAWK->field + 1; + + while (cnt-- > 0) { + if (cp->type < C_STRING) { /* use the string field temporarily */ + if (cp->type == C_NOINIT) { + cp->ptr = (PTR) & MAWK->null_str; + MAWK->null_str.ref_cnt++; + } + else { /* its a number */ + + Int ival; + char xbuff[260]; + + ival = mawk_d_to_I(cp->d.dval); + if (ival == cp->d.dval) + sprintf(xbuff, INT_FMT, ival); + else + sprintf(xbuff, string(MAWK_CONVFMT)->str, cp->d.dval); + + cp->ptr = (PTR) mawk_new_STRING(MAWK, xbuff); + } + } + + len += string(cp)->len; + + if (++cp == cp_limit) { + cp = *++fbp; + cp_limit = cp + FBANK_SZ; + } + + } + + MAWK->field[0].type = C_STRING; + MAWK->field[0].ptr = (PTR) mawk_new_STRING0(MAWK, len); + + p = string(MAWK->field)->str; + + /* walk it again , putting things together */ + cnt = MAWK->nf - 1; + fbp = MAWK->fbank; + cp = MAWK->field + 1; + cp_limit = MAWK->field + FBANK_SZ; + while (cnt-- > 0) { + memcpy(p, string(cp)->str, string(cp)->len); + p += string(cp)->len; + /* if not really string, free temp use of ptr */ + if (cp->type < C_STRING) { + free_STRING(string(cp)); + } + if (++cp == cp_limit) { + cp = *++fbp; + cp_limit = cp + FBANK_SZ; + } + /* add the separator */ + q = ofs->str; + while (*q) + *p++ = *q++; + } + /* tack tail on the end */ + memcpy(p, tail->str, tail->len); + + /* cleanup */ + free_STRING(tail); + free_STRING(ofs); + } +} + +/* We are assigning to a mawk_cell_t and we aren't sure if its + a field */ + +void mawk_bifunct_target_assign(mawk_state_t *MAWK, register mawk_cell_t *target_, mawk_cell_t *source) +{ + mawk_cell_t *target = target_->ptr; /* target is assumed to be the varref */ + + if (target_->type == C_ARR_REF_BT) { + /* reference to an array member: ->ptr is the array, ->d.idx_str is the zmalloc'd string cell */ + mawk_array_set(MAWK, (mawk_array_t)target_->ptr, target_->d.idx_cell, source); + mawk_cell_destroy(MAWK, target_->d.idx_cell); + mawk_zfree(MAWK, target_->d.idx_cell, sizeof(mawk_cell_t)); + target_->type = C_NOINIT; /* don't need to destroy this: it's a special arr ref */ + return ; + } + + /* original code dealing with a normal varref */ + if (target >= MAWK->field && target <= LAST_PFIELD) + mawk_field_assign(MAWK, target, source); + else { + mawk_cell_t **p = MAWK->fbank + 1; + + while (*p) { + if (target >= *p && target < *p + FBANK_SZ) { + mawk_field_assign(MAWK, target, source); + return; + } + p++; + } + /* its not a field */ + mawk_cell_destroy(MAWK, target); + mawk_cellcpy(MAWK, target, source); + } +} + +/* + $0 split into more than MAX_SPLIT fields, + $(MAX_FIELD+1) ... are on the split_ov_list. + Copy into fields which start at fbank[1] +*/ + +static void load_field_ov(mawk_state_t * MAWK) +{ + register SPLIT_OV *p; /* walks split_ov_list */ + register mawk_cell_t *cp; /* target of copy */ + int j; /* current fbank[] */ + mawk_cell_t *cp_limit; /* change fbank[] */ + SPLIT_OV *q; /* trails p */ + + /* make sure the fields are allocated */ + mawk_slow_field_ptr(MAWK, MAWK->nf); + + p = MAWK->split_ov_list; + MAWK->split_ov_list = (SPLIT_OV *) 0; + j = 1; + cp = MAWK->fbank[j]; + cp_limit = cp + FBANK_SZ; + while (p) { + mawk_cell_destroy(MAWK, cp); + cp->type = C_MBSTRN; + cp->ptr = (PTR) p->sval; + + if (++cp == cp_limit) { + cp = MAWK->fbank[++j]; + cp_limit = cp + FBANK_SZ; + } + + q = p; + p = p->link; + MAWK_ZFREE(MAWK, q); + } +} + +/* + assign mawk_cell_t *cp to field or pseudo field + and take care of all side effects +*/ +void mawk_field_assign(mawk_state_t *MAWK, register mawk_cell_t *fp, mawk_cell_t *cp) +{ + mawk_cell_t c; + int i, j; + + /* the most common case first */ + if (fp == MAWK->field) { + mawk_cell_destroy(MAWK, MAWK->field); + mawk_cellcpy(MAWK, fp, cp); + MAWK->nf = -1; + return; + } + + /* its not important to do any of this fast */ + + if (MAWK->nf < 0) + mawk_split_field0(MAWK); + + switch (i = (fp - MAWK->field)) { + + case MAWK_NF_field: + + mawk_cell_destroy(MAWK, MAWK_NF); + mawk_cellcpy(MAWK, &c, cp); + mawk_cellcpy(MAWK, MAWK_NF, &c); + if (c.type != C_NUM) + mawk_cast1_to_num(MAWK, &c); + + if ((j = d_to_i(c.d.dval)) < 0) + mawk_rt_error(MAWK, "negative value assigned to NF"); + + if (j > MAWK->nf) + for (i = MAWK->nf + 1; i <= j; i++) { + cp = field_ptr(i); + mawk_cell_destroy(MAWK, cp); + cp->type = C_STRING; + cp->ptr = (PTR) & MAWK->null_str; + MAWK->null_str.ref_cnt++; + } + + MAWK->nf = j; + build_field0(MAWK); + break; + + case MAWK_RS_field: + mawk_cell_destroy(MAWK, MAWK_RS); + mawk_cellcpy(MAWK, MAWK_RS, cp); + set_rs_shadow(MAWK); + break; + + case MAWK_FS_field: + mawk_cell_destroy(MAWK, MAWK_FS); + mawk_cellcpy(MAWK, MAWK_FS, cp); + mawk_cellcpy(MAWK, &MAWK->fs_shadow, MAWK_FS); + mawk_cast_for_split(MAWK, &MAWK->fs_shadow); + break; + + case MAWK_OFMT_field: + case MAWK_CONVFMT_field: + /* If the user does something stupid with OFMT or CONVFMT, + we could crash. + We'll make an attempt to protect ourselves here. This is + why OFMT and CONVFMT are pseudo fields. + + The ptrs of OFMT and CONVFMT always have a valid mawk_string_t, + even if assigned a NUM or NOINIT + */ + + free_STRING(string(fp)); + mawk_cellcpy(MAWK, fp, cp); + if (fp->type < C_STRING) /* !! */ + fp->ptr = (PTR) mawk_new_STRING(MAWK, "%.6g"); + else if (fp == MAWK_CONVFMT) { + /* It's a string, but if it's really goofy and CONVFMT, + it could still mawk_damage us. Test it . + */ + char xbuff[512]; + + xbuff[256] = 0; + sprintf(xbuff, string(fp)->str, 3.1459); + if (xbuff[256]) + mawk_rt_error(MAWK, "CONVFMT assigned unusable value"); + } + break; + + default: /* $1 or $2 or ... */ + mawk_cell_destroy(MAWK, fp); + mawk_cellcpy(MAWK, fp, cp); + + if (i < 0 || i > MAX_SPLIT) + i = mawk_field_addr_to_index(MAWK, fp); + + if (i > MAWK->nf) { + for (j = MAWK->nf + 1; j < i; j++) { + cp = field_ptr(j); + mawk_cell_destroy(MAWK, cp); + cp->type = C_STRING; + cp->ptr = (PTR) & MAWK->null_str; + MAWK->null_str.ref_cnt++; + } + MAWK->nf = i; + mawk_cell_destroy(MAWK, MAWK_NF); + MAWK_NF->type = C_NUM; + MAWK_NF->d.dval = (mawk_num_t) i; + } + + build_field0(MAWK); + + } +} diff --git a/src/libmawk/files.c b/src/libmawk/files.c new file mode 100644 index 0000000..f0b3127 --- /dev/null +++ b/src/libmawk/files.c @@ -0,0 +1,327 @@ +/******************************************** +files.c + +libmawk changes (C) 2009-2013, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-94. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "conf.h" +#include +#include +#include +#include "mawk.h" +#include "files.h" +#include "memory.h" +#include "fin.h" +#include "vio.h" + +#ifdef PATH_MAX +#define PATH_BUFF_SIZE PATH_MAX +#else +#define PATH_BUFF_SIZE 1024 +#endif + +/* mawk_find a file on file_list */ +FILE_NODE *mawk_file_find(mawk_state_t *MAWK, mawk_string_t *sval, int type, int create) +{ + register FILE_NODE *p = MAWK->file_list; + FILE_NODE *q = (FILE_NODE *) 0; + const char *name_orig = sval->str; + const char *name; + char name_buff[PATH_BUFF_SIZE]; + mawk_vio_t *vf; + + if (MAWK->file_name_rewrite != NULL) + name = MAWK->file_name_rewrite(name_orig, name_buff, sizeof(name_buff), type); + else + name = name_orig; + + if (name == NULL) + goto out_failure; + + /* rewrite - to be /dev/stdin so we never have 2 names for the same thing */ + if ((name[0] == '-') && (name[1] == '\0')) + name = "/dev/stdin"; + + while (1) { + if (!p) { + if (!create) + goto nocreate_failure; + /* open a new one */ + p = MAWK_ZMALLOC(MAWK, FILE_NODE); + + p->vf = NULL; + p->fin = NULL; + + switch (p->type = type) { + case F_TRUNC: + p->vf = mawk_vio_open(MAWK, name, MAWK_VIO_O_TRUNC); + if (p->vf == NULL) + goto out_failure; + p->vf->refco = 1; + break; + + case F_APPEND: + p->vf = mawk_vio_open(MAWK, name, MAWK_VIO_O_APPEND); + if (p->vf == NULL) + goto out_failure; + p->vf->refco = 1; + break; + + case F_IN: + if ((p->fin = (PTR) mawk_fin_alloc(MAWK, p)) == NULL) { + mawk_zfree(MAWK, p, sizeof(FILE_NODE)); + return (PTR) 0; + } + p->vf = mawk_vio_open(MAWK, name, MAWK_VIO_I); + if (p->vf == NULL) { + mawk_fin_free(MAWK, p->fin); + mawk_zfree(MAWK, p, sizeof(FILE_NODE)); + return (PTR) 0; + } + p->vf->refco = 1; + break; + + case PIPE_OUT: + case PIPE_IN: + vf = mawk_vio_open_pipe(MAWK, name, type); + if (vf == NULL) { + if (type == PIPE_OUT) { + goto out_failure; + } + else { + mawk_zfree(MAWK, p, sizeof(FILE_NODE)); + return (PTR) 0; + } + } + if (type == PIPE_IN) + p->fin = mawk_fin_alloc(MAWK, p); + p->vf = vf; + p->vf->refco = 1; + break; + +#ifdef DEBUG + default: + mawk_bozo(MAWK, "bad file type"); +#endif + } + /* successful open */ + if (name != name_orig) { + /* name has been rewritten, have to alloc a new string */ + p->name = mawk_new_STRING(MAWK, name); + } + else + p->name = sval; + sval->ref_cnt++; + break; /* while loop */ + } + + /* search is by name and type */ + if ((strcmp(name, p->name->str) == 0 && ((p->type == type) || + /* no distinction between F_APPEND and F_TRUNC here */ + (p->type >= F_APPEND && type >= F_APPEND)))) + { + /* found */ + if (!q) /*at front of list */ + return p; + /* delete from list for move to front */ + q->link = p->link; + break; /* while loop */ + } + + q = p; + p = p->link; + } /* end while loop */ + + /* put p at the front of the list */ + p->link = MAWK->file_list; + return (MAWK->file_list = p); + +out_failure:; + mawk_errmsg(MAWK, errno, "cannot open \"%s\" for output", name); + mawk_exitval(MAWK, 2, NULL); + return NULL; + +nocreate_failure:; + mawk_errmsg(MAWK, errno, "cannot open \"%s\" - it does not exist in current context and should not be created now", name); + mawk_exitval(MAWK, 2, NULL); + return NULL; +} + +FILE_NODE *mawk_file_find_(mawk_state_t *MAWK, const char *name, int type, int create) +{ + FILE_NODE *f; + mawk_string_t *sval = mawk_new_STRING(MAWK, name); + f = mawk_file_find(MAWK, sval, type, create); + free_STRING(sval); + return f; +} + +static int mawk_file_close_lowlev(mawk_state_t *MAWK, FILE_NODE *p) +{ + if (p->fin != NULL) { + mawk_fin_free(MAWK, p->fin); + p->fin = NULL; + } + if (p->vf != NULL) { + p->vf->refco--; + if (p->vf->refco == 0) + mawk_vio_close(MAWK, p->vf); + p->vf = NULL; + } + if (p->name != NULL) { + free_STRING(p->name); + } + return 0; +} + +/* Close a file and delete it's node from the file_list. + Walk the whole list, in case a name has two nodes, + e.g. < "/dev/tty" and > "/dev/tty" +*/ +int mawk_file_close(mawk_state_t *MAWK, mawk_string_t *sval) +{ + FILE_NODE dummy; + register FILE_NODE *p; + FILE_NODE *q = &dummy; /* trails p */ + FILE_NODE *hold; + char *name = sval->str; + int retval = -1; + + dummy.link = p = MAWK->file_list; + while (p) { + if (strcmp(name, p->name->str) == 0) { + /* found */ + retval = mawk_file_close_lowlev(MAWK, p); + hold = p; + q->link = p = p->link; + MAWK_ZFREE(MAWK, hold); + } + else { + q = p; + p = p->link; + } + } + + MAWK->file_list = dummy.link; + return retval; +} + +/* Close a file_node and delete it's node from the file_list. */ +int mawk_file_close_(mawk_state_t *MAWK, FILE_NODE *f) +{ + FILE_NODE dummy; + register FILE_NODE *p; + FILE_NODE *q = &dummy; /* trails p */ + FILE_NODE *hold; + int retval = -1; + + dummy.link = p = MAWK->file_list; + while (p) { + if (p == f) { + /* found */ + retval = mawk_file_close_lowlev(MAWK, p); + hold = p; + q->link = p = p->link; + MAWK_ZFREE(MAWK, hold); + } + else { + q = p; + p = p->link; + } + } + + MAWK->file_list = dummy.link; + return retval; +} + + +/* +mawk_find an output file with name == sval and fflush it +*/ +int mawk_file_flush(mawk_state_t *MAWK, mawk_string_t *sval) +{ + int ret = -1; + register FILE_NODE *p = MAWK->file_list; + unsigned len = sval->len; + char *str = sval->str; + + if (len == 0) { + /* for consistency with gawk */ + mawk_flush_all_output(MAWK); + return 0; + } + + while (p) { + if (IS_OUTPUT(p->type) && len == p->name->len && strcmp(str, p->name->str) == 0) { + ret = 0; + mawk_vio_flush(MAWK, p->vf); + /* it's possible for a command and a file to have the same + name -- so keep looking */ + } + p = p->link; + } + return ret; +} + +void mawk_flush_all_output(mawk_state_t * MAWK) +{ + FILE_NODE *p; + + for (p = MAWK->file_list; p; p = p->link) + if (IS_OUTPUT(p->type)) + mawk_vio_flush(MAWK, p->vf); +} + +FILE_NODE *mawk_file_register_nofin(mawk_state_t *MAWK, const char *name, int type, mawk_vio_t *vf) +{ + FILE_NODE *p; + + p = MAWK_ZMALLOC(MAWK, FILE_NODE); + p->link = MAWK->file_list; + p->type = type; + p->name = mawk_new_STRING(MAWK, name); + p->vf = vf; + p->fin = NULL; + if (vf != NULL) + vf->refco++; + MAWK->file_list = p; + + /* update hardwireds */ + if (strcmp(name, "/dev/stdin") == 0) + MAWK->fnode_stdin = p; + else if (strcmp(name, "/dev/stdout") == 0) + MAWK->fnode_stdout = p; + else if (strcmp(name, "/dev/stderr") == 0) + MAWK->fnode_stderr = p; + + return p; +} + +FILE_NODE *mawk_file_register(mawk_state_t *MAWK, const char *name, int type, mawk_vio_t *vf) +{ + FILE_NODE *fn; + + fn = mawk_file_register_nofin(MAWK, name, type, vf); + if ((type == F_IN) || (type == PIPE_IN)) + fn->fin = mawk_fin_alloc(MAWK, fn); + return fn; +} + +void mawk_file_uninit(mawk_state_t * MAWK) +{ + FILE_NODE *p, *next; + + for (p = MAWK->file_list; p; p = next) { + next = p->link; + mawk_file_close_lowlev(MAWK, p); + MAWK_ZFREE(MAWK, p); + } +} diff --git a/src/libmawk/files.h b/src/libmawk/files.h new file mode 100644 index 0000000..b235dc6 --- /dev/null +++ b/src/libmawk/files.h @@ -0,0 +1,56 @@ + +/******************************************** +files.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef FILES_H +#define FILES_H + +/* IO redirection types */ +#define F_IN (-5) +#define PIPE_IN (-4) +#define PIPE_OUT (-3) +#define F_APPEND (-2) +#define F_TRUNC (-1) +#define IS_OUTPUT(type) ((type)>=PIPE_OUT) + +/* look up a file:type by name; if it does not exist, create it (if create is not 0). + NOTE: F_APPEND and F_TRUNC are the same in file:type. */ +FILE_NODE *mawk_file_find(mawk_state_t *MAWK, mawk_string_t *name, int type, int create); +FILE_NODE *mawk_file_find_(mawk_state_t *MAWK, const char *name, int type, int create); + + +int mawk_file_close(mawk_state_t *, mawk_string_t *); +int mawk_file_close_(mawk_state_t *MAWK, FILE_NODE *f); +int mawk_file_flush(mawk_state_t *, mawk_string_t *); +void mawk_flush_all_output(mawk_state_t * MAWK); + +/* register a mawk_vio_t * as an open file - no checks are performed about + the name, which should be unique; the caller should use mawk_file_find + to make sure the file does not exist + nofin never sets up the file node's fin buffer, while the plain version + does this for input files/pipes. + */ +FILE_NODE *mawk_file_register_nofin(mawk_state_t *MAWK, const char *name, int type, mawk_vio_t *vf); +FILE_NODE *mawk_file_register(mawk_state_t *MAWK, const char *name, int type, mawk_vio_t *vf); + + +#ifndef MAWK_NO_FORK +int mawk_wait_for(mawk_state_t *, int); +#endif + + +void mawk_file_uninit(mawk_state_t * MAWK); + +#endif diff --git a/src/libmawk/files_children.c b/src/libmawk/files_children.c new file mode 100644 index 0000000..645d047 --- /dev/null +++ b/src/libmawk/files_children.c @@ -0,0 +1,110 @@ +/******************************************** +files_children.c + +libmawk changes (C) 2009-2014, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-94. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "conf.h" +#include +#include +#include +#include +#include +#include "mawk.h" +#include "files.h" +#include "memory.h" +#include "fin.h" + + +static void add_to_child_list(mawk_state_t *, int, int); +static struct child *remove_from_child_list(mawk_state_t *, int); + +/* we need to wait for children at the end of output pipes to + complete so we know any files they have created are complete */ + +static void add_to_child_list(mawk_state_t *MAWK, int pid, int exit_status) +{ + register struct child *p = MAWK_ZMALLOC(MAWK, struct child); + + p->pid = pid; + p->exit_status = exit_status; + p->link = MAWK->child_list; + MAWK->child_list = p; +} + +static struct child *remove_from_child_list(mawk_state_t *MAWK, int pid) +{ + struct child dummy; + register struct child *p; + struct child *q = &dummy; + + dummy.link = p = MAWK->child_list; + while (p) { + if (p->pid == pid) { + q->link = p->link; + break; + } + else { + q = p; + p = p->link; + } + } + + MAWK->child_list = dummy.link; + return p; + /* null return if not in the list */ +} + + +#ifndef MAWK_NO_FORK +/* wait for a specific child to complete and return its + exit status + + If pid is zero, wait for any single child and + put it on the dead children list +*/ +int mawk_wait_for(mawk_state_t *MAWK, int pid) +{ + int exit_status; + struct child *p; + int id; + + if (pid == 0) { + id = wait(&exit_status); + add_to_child_list(MAWK, id, exit_status); + } + /* see if an earlier wait() caught our child */ + else if ((p = remove_from_child_list(MAWK, pid))) { + exit_status = p->exit_status; + MAWK_ZFREE(MAWK, p); + } + else { + /* need to really wait */ + while ((id = wait(&exit_status)) != pid) { + if (id == -1) /* can't happen */ + mawk_bozo(MAWK, "mawk_wait_for"); + else { + /* we got the exit status of another child + put it on the child list and try again */ + add_to_child_list(MAWK, id, exit_status); + } + } + } + + if (exit_status & 0xff) + exit_status = 128 + (exit_status & 0xff); + else + exit_status = (exit_status & 0xff00) >> 8; + + return exit_status; +} +#endif + diff --git a/src/libmawk/fin.h b/src/libmawk/fin.h new file mode 100644 index 0000000..67b8c57 --- /dev/null +++ b/src/libmawk/fin.h @@ -0,0 +1,53 @@ + +/******************************************** +fin.h + +libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* buffered, splitting input (FIN, aka mawk_input_t) */ + +#ifndef FIN_H +#define FIN_H +/* structure to control input files */ + +enum { + MAWK_INPF_MAIN = 1, /* part of main input stream if on */ + MAWK_INPF_EOF = 2, /* reached EOF */ + MAWK_INPF_START = 4, /* used when RS == "" */ + MAWK_INPF_NO_MORE = 8, /* there's no more input */ + MAWK_INPF_DEAD_NO_FREE = 16, /* static dead buffer - do not free */ + MAWK_INPF_NO_MORE_INPUTS = 32 /* we are at the end of the input file list, impossible to have any more main input */ +}; + +struct mawk_fin_s { + FILE_NODE *fn; + char *buf; + char *next; /* start of the next record, within buf; anything before this is already returned in a previous call */ + int used; /* how much bytes are in use in buf - this includes records already used up in the beginning of the buffer */ + int alloced; /* total allocated size of the buf */ + int flags; +}; + + +#define mawk_FIN_nomore (-2) + +mawk_input_t *mawk_fin_alloc(mawk_state_t *MAWK, FILE_NODE *parent); +void mawk_fin_free(mawk_state_t *MAWK, mawk_input_t *fin); + +long mawk_fillbuff(mawk_state_t *, mawk_input_t *, char *, unsigned, int interactive); + +/* execution: */ +void mawk_FINopen_main(mawk_state_t * MAWK); +char *mawk_FINgets(mawk_state_t *MAWK, FILE_NODE *fn, unsigned *len_p); + +#endif /* FIN_H */ diff --git a/src/libmawk/fin_common.c b/src/libmawk/fin_common.c new file mode 100644 index 0000000..1be171a --- /dev/null +++ b/src/libmawk/fin_common.c @@ -0,0 +1,121 @@ + +/******************************************** +fin.c + +libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, 1992. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "conf.h" +#include +#include "mawk.h" +#include "fin.h" +#include "memory.h" +#include "bi_vars.h" +#include "field.h" +#include "symtype.h" +#include "scan.h" +#include "vio.h" +#include + +#ifndef NO_FCNTL_H +#include +#endif + +/* This file handles input file buffering and (most important) splitting + files into records, mawk_FINgets(). +*/ +mawk_input_t *mawk_fin_alloc(mawk_state_t *MAWK, FILE_NODE *parent) +{ + mawk_input_t *fin; + + fin = MAWK_ZMALLOC(MAWK, mawk_input_t); + fin->flags = MAWK_INPF_START; + fin->buf = NULL; + fin->next = NULL; + fin->used = 0; + fin->alloced = 0; + fin->fn = parent; + return fin; +} + + +int mawk_is_cmdline_assign(mawk_state_t *, char *); /* also used by init */ + +/* frees the buffer, but leaves mawk_input_t structure until + the user calls close() */ +static void mawk_fin_free_buff(mawk_state_t *MAWK, register mawk_input_t *fin) +{ + if (fin->buf != NULL) { + mawk_zfree(MAWK, fin->buf, fin->alloced); + fin->buf = NULL; /* marks it semi_closed */ + fin->used = 0; + fin->alloced = 0; + } +} + +/* user called close() on input file */ +void mawk_fin_free(mawk_state_t *MAWK, mawk_input_t *fin) +{ + if (fin->flags & MAWK_INPF_DEAD_NO_FREE) + return; + mawk_fin_free_buff(MAWK, fin); + MAWK_ZFREE(MAWK, fin); +} + +/*-------- + Attempt to read size bytes, retry until eof or no_more. + target is big enough to hold size + 1 chars + on exit the back of the target is zero terminated, unless error + Returns the number of bytes read or an error indication if there were no new bytes + Read exactly size bytes, retrying as needed, unless: + - eof (first eof: returns the bytes read so far; second call: returns 0) + - error (first error: return -1, discard the buffer(!)) + - no_more (first no_more: returns the bytes read so far; second no_more: returns no_more) + *--------------*/ +long mawk_fillbuff(mawk_state_t *MAWK, mawk_input_t *fin, register char *target, unsigned size, int interactive) +{ + register int r; + unsigned entry_size = size; + { + while (size) { + errno = 0; + fin->flags &= ~MAWK_INPF_NO_MORE; + switch (r = mawk_vio_read(MAWK, fin->fn->vf, target, size)) { + case -2: + fin->flags |= MAWK_INPF_NO_MORE; + *target = 0; + if (entry_size - size > 0) + return entry_size - size; + return mawk_FIN_nomore; + case -1: + { + int e; + e = errno; + mawk_set_errno(MAWK, strerror(e)); + mawk_errmsg(MAWK, e, "read error"); + mawk_exitval(MAWK, 2, -1); + } + case 0: + goto out; + + default: + target += r; + size -= r; + if (interactive) + goto out; + break; + } + } + } +out: + *target = 0; + return entry_size - size; +} diff --git a/src/libmawk/fin_comp.c b/src/libmawk/fin_comp.c new file mode 100644 index 0000000..2916920 --- /dev/null +++ b/src/libmawk/fin_comp.c @@ -0,0 +1,19 @@ +/******************************************** +fin_comp.c + +libmawk changes (C) 2009-2013, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" + +int mawk_is_cmdline_assign(mawk_state_t *MAWK, char *s) +{ + return 0; +} diff --git a/src/libmawk/fin_exec.c b/src/libmawk/fin_exec.c new file mode 100644 index 0000000..770fa39 --- /dev/null +++ b/src/libmawk/fin_exec.c @@ -0,0 +1,364 @@ +/******************************************** +fin_exec.c + +libmawk changes (C) 2009-2013, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, 1992. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "conf.h" +#include +#include +#include "mawk.h" +#include "fin.h" +#include "memory.h" +#include "bi_vars.h" +#include "field.h" +#include "symtype.h" +#include "scan.h" +#include "vio.h" +#include "init.h" +#include "vars.h" +#include "cell.h" +#include "files.h" + +#ifndef NO_FCNTL_H +#include +#endif + +/* main_input is a FILE_NODE to the main input stream + == 0 never been opened */ + +static void set_main_to_stdin(mawk_state_t * MAWK) +{ + FILE_NODE *fn; + mawk_cell_destroy(MAWK, FILENAME); + FILENAME->type = C_STRING; + FILENAME->ptr = (PTR) mawk_new_STRING(MAWK, "-"); + mawk_cell_destroy(MAWK, FNR); + FNR->type = C_NUM; + FNR->d.dval = MAWK_NUM_ZERO; + MAWK->rt_fnr = 0; + MAWK->main_input = mawk_file_find_(MAWK, "/dev/stdin", F_IN, 1); +#warning TODO: abstract + MAWK->main_input->fin->flags |= MAWK_INPF_MAIN; +} + +/* get the next command line file open */ +static mawk_input_t *next_main(mawk_state_t *MAWK, int open_flag) +{ + /* open_flag: called by mawk_FINopen_main() if on */ + register mawk_cell_t *cp; + mawk_cell_t argc; /* copy of ARGC */ + mawk_cell_t c_argi; /* cell copy of argi */ + mawk_cell_t argval; /* copy of ARGV[c_argi] */ + + + argval.type = C_NOINIT; + c_argi.type = C_NUM; + + if (MAWK->main_input != NULL) + mawk_file_close_(MAWK, MAWK->main_input); + MAWK->main_input = NULL; + /* FILENAME and FNR don't change unless we really open + a new file */ + + /* make a copy of ARGC to avoid side effect */ + mawk_cellcpy(MAWK, &argc, ARGC); + if (argc.type != C_NUM) + mawk_cast1_to_num(MAWK, &argc); + + while (MAWK->argi < argc.d.dval) { + c_argi.d.dval = MAWK->argi; + MAWK->argi += MAWK_NUM_ONE; + + if ((mawk_array_find(MAWK, MAWK->Argv, &c_argi, &argval, NO_MAWK_CREATE)) == 0) + continue; /* its deleted */ + + /* make a copy so we can mawk_cast w/o side effect */ + cp = &argval; + if (cp->type < C_STRING) + mawk_cast1_to_str(MAWK, cp); + if (string(cp)->len == 0) + continue; + /* file argument is "" */ + + /* it might be a command line assignment */ + if (mawk_is_cmdline_assign(MAWK, string(cp)->str)) + continue; + + /* try to open it -- we used to continue on failure, + but posix says we should quit */ + if ((MAWK->main_input = mawk_file_find(MAWK, string(cp), F_IN, 1)) == NULL) { + mawk_errmsg(MAWK, errno, "cannot open %s", string(cp)->str); + mawk_exitval(MAWK, 2, NULL); + } +#warning TODO abstract this + MAWK->main_input->fin->flags |= MAWK_INPF_MAIN; + + /* success -- set FILENAME and FNR */ + mawk_cell_destroy(MAWK, FILENAME); + mawk_cellcpy(MAWK, FILENAME, cp); + free_STRING(string(cp)); + mawk_cell_destroy(MAWK, FNR); + FNR->type = C_NUM; + FNR->d.dval = MAWK_NUM_ZERO; + MAWK->rt_fnr = 0; + + return MAWK->main_input->fin; + } + /* failure */ + mawk_cell_destroy(MAWK, &argval); + + if (open_flag) { + /* all arguments were null or assignment */ + set_main_to_stdin(MAWK); + return MAWK->main_input->fin; + } + + /* real failure */ + { + /* this is how we mark EOF on main_fin */ + static mawk_input_t dead_main = { NULL, NULL, NULL, 0, 0, + MAWK_INPF_EOF | MAWK_INPF_DEAD_NO_FREE | MAWK_INPF_NO_MORE_INPUTS}; + + MAWK->main_input = mawk_file_register_nofin(MAWK, "DEAD_MAIN", F_IN, NULL); + MAWK->main_input->fin = &dead_main; + return MAWK->main_input->fin; + /* since MAWK_INPF_MAIN is not set, mawk_FINgets won't call next_main() */ + } +} + + +/* this gets called once to get the input stream going. + It is called after the execution of the BEGIN block + unless there is a getline inside BEGIN {} +*/ +void mawk_FINopen_main(mawk_state_t * MAWK) +{ + mawk_cell_t argc; + + mawk_cellcpy(MAWK, &argc, ARGC); + if (argc.type != C_NUM) + mawk_cast1_to_num(MAWK, &argc); + + if (argc.d.dval == MAWK_NUM_ONE) + set_main_to_stdin(MAWK); + else + next_main(MAWK, 1); +} + +int mawk_is_cmdline_assign(mawk_state_t *MAWK, char *s) +{ + register char *p; + int c; + mawk_cell_t *cp; + unsigned len; + mawk_cell_t *fp = (mawk_cell_t *) 0; /* ditto */ + + if (MAWK->scan_code[*(unsigned char *) s] != SC_IDCHAR) + return 0; + + p = s + 1; + while ((c = MAWK->scan_code[*(unsigned char *) p]) == SC_IDCHAR || c == SC_DIGIT) + p++; + + if (*p != '=') + return 0; + + *p = 0; + + cp = mawk_create_var(MAWK, s, &fp); + if (cp == NULL) { + mawk_rt_error(MAWK, "cannot command line assign to %s\n\ttype clash or keyword", s); + } + + /* we need to keep ARGV[i] intact */ + *p++ = '='; + len = strlen(p) + 1; + /* posix says escape sequences are on from command line */ + p = mawk_rm_escape(MAWK, strcpy((char *) mawk_zmalloc(MAWK, len), p)); + cp->ptr = (PTR) mawk_new_STRING(MAWK, p); + mawk_zfree(MAWK, p, len); + mawk_check_strnum(MAWK, cp); /* sets cp->type */ + if (fp) { /* move it from cell to pfield[] */ + mawk_field_assign(MAWK, fp, cp); + free_STRING(string(cp)); + } + return 1; +} + +static char *find_sep(mawk_state_t *MAWK, int at_end, char *str, unsigned int *match_len) +{ + register char *q, *start; + + /* set up split rule (match pattern and set match length) */ + switch (MAWK->rs_shadow.type) { + case SEP_CHAR: /* single char sep */ + start = strchr(str, MAWK->rs_shadow.c); + *match_len = 1; + break; + + case SEP_STR: /* static string sep */ + *match_len = ((mawk_string_t *) MAWK->rs_shadow.ptr)->len; + start = mawk_str_str(str, ((mawk_string_t *) MAWK->rs_shadow.ptr)->str, *match_len); + break; + + /* regex or MLR sep */ + case SEP_MLR: + case SEP_RE: + start = mawk_re_pos_match(MAWK, str, MAWK->rs_shadow.ptr, match_len); + /* if the match is at the end, there might still be + more to match in the file */ + if (start && start[*match_len] == 0 && !at_end) + start = (char *) 0; + break; + + default: + mawk_bozo(MAWK, "type of rs_shadow"); + } + return start; +} + +/* return one input record as determined by RS, + from input file (FIN) fin +*/ +char *mawk_FINgets(mawk_state_t *MAWK, FILE_NODE *fn, unsigned *len_p) +{ + register char *p, *q; + char *sep_at; + unsigned match_len, available; + long r; + + while(!MAWK->do_exit) { /* restart */ + mawk_input_t *fin = fn->fin; + + if (fin->flags & MAWK_INPF_NO_MORE_INPUTS) + return NULL; + + /* at least try reading some more before giving up */ + fin->flags &= ~MAWK_INPF_NO_MORE; + + if ((fin->used == 0) && (fin->flags & MAWK_INPF_EOF)) { + if (fin->flags & MAWK_INPF_MAIN) { + fin = next_main(MAWK, 0); + if (fin == NULL) + return NULL; + continue; /* restart */ + } + else { + /* eof on a non-main file: no chance to get another file, report eof and exit */ + return NULL; + } + } + + /* have to retry finding a sep even if the buffer is the same partial + buffer we had last time: RS may have changed between the two calls! + The only exception is when buffer is empty for sure */ + if ((fin->used > 0) && (*fin->next != '\0')) { + sep_at = find_sep(MAWK, (fin->flags & MAWK_INPF_EOF), fin->next, &match_len); + + /* did find the separator pattern, cut string and return the from the beginning of the record */ + if (sep_at != NULL) { + char *start = fin->next; + /* the easy and normal case: found a record */ + *sep_at = 0; + *len_p = sep_at - start; + fin->next = sep_at + match_len; + if (fin->next - fin->buf >= fin->used) { + /* the buffer got empty - update things to make it faster */ + fin->next = fin->buf; + fin->used = 0; + } + *len_p = strlen(start); + return start; + } + + /* no sep, but eof... */ + if (fin->flags & MAWK_INPF_EOF) { + char *s; + /* ...last line without a record terminator! Return it anyway */ + *len_p = r = strlen(fin->next); + s = fin->next + r; + /* WHAT? for some reason we remove the last newline here */ + if (MAWK->rs_shadow.type == SEP_MLR && s[-1] == '\n' && r != 0) { + (*len_p)--; + *--s = 0; + } + s = fin->next; + fin->next = fin->buf; + fin->used = 0; + return s; + } + + /* didn't find a separator and we are not at the end of the file */ + if (fin->next != fin->buf) { + int new_len; + /* we are deep into the buffer, the buffer ends with a partial record. + Move it to the beginning of the buffer */ + new_len = fin->used - (fin->next - fin->buf); + if (new_len > 0) + memmove(fin->buf, fin->next, new_len); + fin->used = new_len; + fin->next = fin->buf; + } + /* ... so try to read some more data */ + } + + available = fin->alloced - fin->used; + if (available < BUFFSZ/2) { + /* have to grow */ + int next_offs = fin->next - fin->buf; + + fin->buf = mawk_zrealloc(MAWK, fin->buf, fin->alloced, fin->alloced + BUFFSZ); + fin->alloced += BUFFSZ; + available += BUFFSZ; + fin->next = fin->buf + next_offs; + } + r = mawk_fillbuff(MAWK, fin, fin->buf + fin->used, available-1, MAWK->interactive_flag); + if (r == 0) { + fin->flags |= MAWK_INPF_EOF; + continue; /* may have a next file (???might be main) */ + } + else if (r == mawk_FIN_nomore) { + /* no more to read now and we had at most a partial record in buffer */ + return (char *)mawk_FIN_nomore; + } + else if (r < 0) { + return NULL; + } + + fin->used += r; + fin->buf[fin->used] = '\0'; + + if (fin->flags & MAWK_INPF_START) { + if (MAWK->rs_shadow.type == SEP_MLR) { + char *s; + /* trim blank lines from front of file */ +#warning TODO: probably accept \r as well + for(s = fin->next; *s == '\n'; s++) ; + if (*s == '\0') { + /* emptied the buffer with all the \n's... so get back to initial state */ + fin->next = fin->buf; + fin->used = 0; + continue; /* restart */ + } + /* found a non-'\n', use that as a potential start of the next record */ + fin->flags &= ~MAWK_INPF_START; /* we are not at the start anymore */ + fin->next = s; + continue; /* restart: read on */ + } + else + fin->flags &= ~MAWK_INPF_START; + } + } + + /* get here if MAWK -> do_exit */ + return NULL; +} diff --git a/src/libmawk/gdb/.gdbinit b/src/libmawk/gdb/.gdbinit new file mode 100644 index 0000000..47fa685 --- /dev/null +++ b/src/libmawk/gdb/.gdbinit @@ -0,0 +1,18 @@ + +define abreak + break mawk_breakpoint if MAWK->token_lineno == $arg0 +end + +define astep + set lineno = MAWK->token_lineno + break mawk_breakpoint if MAWK->token_lineno == $lineno +end + +define awhere + call mawk_debug_where(MAWK) +end + +echo Debugging libmawk awk code\n + +abreak 2 +run diff --git a/src/libmawk/gdb/test b/src/libmawk/gdb/test new file mode 100755 index 0000000..36da814 --- /dev/null +++ b/src/libmawk/gdb/test @@ -0,0 +1 @@ +gdb --args ../lmawk -Wdebug -f test.awk diff --git a/src/libmawk/gdb/test.awk b/src/libmawk/gdb/test.awk new file mode 100644 index 0000000..e29cbed --- /dev/null +++ b/src/libmawk/gdb/test.awk @@ -0,0 +1,10 @@ +function fun1() +{ + print "q" +} + +BEGIN { + fun1(); + print 1 + print 2 +} \ No newline at end of file diff --git a/src/libmawk/hash.c b/src/libmawk/hash.c new file mode 100644 index 0000000..c9d9bc9 --- /dev/null +++ b/src/libmawk/hash.c @@ -0,0 +1,280 @@ + +/******************************************** +mawk_hash.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "memory.h" +#include "symtype.h" +#include "cell.h" +#include + +unsigned mawk_hash(register const char *s) +{ + register unsigned h = 0; + + while (*s) + h += h + *s++; + return h; +} + +static HASHNODE *delete(mawk_state_t *, const char *); + +/* +mawk_insert a string in the symbol table. + Caller knows the symbol is not there + -- used for initialization + the name must persist (can not be free'd as long as symbol is in the table + and it won't be free'd at the end either)! +*/ +SYMTAB *mawk_insert(mawk_state_t *MAWK, const char *name) +{ + register HASHNODE *p = MAWK_ZMALLOC(MAWK, HASHNODE); + register unsigned h; + + p->link = MAWK->hash_table[h = mawk_hash(name) % HASH_PRIME]; + p->symtab.name = name; + MAWK->hash_table[h] = p; + return &p->symtab; +} + +/* Find s in the symbol table, + if not there and alloc !=0 mawk_insert it (s must be dup'ed) */ + +SYMTAB *mawk_find(mawk_state_t *MAWK, const char *s, int alloc) +{ + register HASHNODE *p; + HASHNODE *q; + unsigned h; + + p = MAWK->hash_table[h = mawk_hash(s) % HASH_PRIME]; + q = (HASHNODE *) 0; + while (1) { + if (!p) { + if (alloc) { + p = MAWK_ZMALLOC(MAWK, HASHNODE); + p->symtab.type = ST_NONE; + p->symtab.name = strcpy(mawk_zmalloc(MAWK, strlen(s) + 1), s); + break; + } + else + return NULL; + } + + if (strcmp(p->symtab.name, s) == 0) { /* found */ + if (!q) /* already at the front */ + return &p->symtab; + else { /* delete from the list */ + + q->link = p->link; + break; + } + } + + q = p; + p = p->link; + } + /* put p on front of the list */ + p->link = MAWK->hash_table[h]; + MAWK->hash_table[h] = p; + return &p->symtab; +} + + +/* remove a node from the mawk_hash table + return a ptr to the node */ + +static HASHNODE *delete(mawk_state_t *MAWK, const char *s) +{ + register HASHNODE *p; + HASHNODE *q = (HASHNODE *) 0; + unsigned h; + + p = MAWK->hash_table[MAWK->last_hash = h = mawk_hash(s) % HASH_PRIME]; + while (p) { + if (strcmp(p->symtab.name, s) == 0) { /* found */ + if (q) + q->link = p->link; + else + MAWK->hash_table[h] = p->link; + return p; + } + else { + q = p; + p = p->link; + } + } + +#ifdef DEBUG /* we should not ever get here */ + mawk_bozo(MAWK, "delete"); +#endif + return (HASHNODE *) 0; +} + +#ifdef MAWK_MEM_PEDANTIC +#include +static void mawk_delete_cell(mawk_state_t *MAWK, SYMTAB *stp) +{ + switch(stp->type) { + case ST_NONE: + break; + case ST_VAR: + case ST_BUILTIN: + case ST_FIELD: + if (stp->stval.cp != NULL) + mawk_cell_destroy(MAWK, stp->stval.cp); + break; + case ST_ARRAY: + mawk_array_destroy(MAWK, stp->stval.array); + break; + case ST_FUNCT: + if (stp->stval.fbp->nargs > 0) + mawk_zfree(MAWK, stp->stval.fbp->typev, stp->stval.fbp->nargs); + if (stp->stval.fbp->code != NULL) + mawk_zfree(MAWK, stp->stval.fbp->code, stp->stval.fbp->size); + MAWK_ZFREE(MAWK, stp->stval.fbp); + break; + } +/* we should decide if name was dynamically allocated (from scan)... + if ((stp->name != NULL) && (stp->name_dyna)) { + int len = strlen(stp->name) + 1; + fprintf(stderr, "FR: '%s'\n", stp->name); + mawk_zfree(MAWK, (PTR) stp->name, len); + stp->name = NULL; + }*/ +} + +#define mawk_delete_node(p) MAWK_ZFREE(MAWK, p) + +void mawk_delete(mawk_state_t *MAWK, const char *name, int cell_destroy) +{ + register HASHNODE *p = delete(MAWK, name); + if (p != NULL) { + SYMTAB *stp = &p->symtab; + if (cell_destroy) + mawk_delete_cell(MAWK, stp); + mawk_delete_node(p); + } +} +#endif + +/* store a global id on the save list, + return a ptr to the local symtab */ +SYMTAB *mawk_save_id(mawk_state_t *MAWK, const char *s) +{ + HASHNODE *p, *q; + unsigned h; + + p = delete(MAWK, s); + q = MAWK_ZMALLOC(MAWK, HASHNODE); + q->symtab.type = ST_LOCAL_NONE; + q->symtab.name = p->symtab.name; + /* put q in the mawk_hash table */ + q->link = MAWK->hash_table[h = MAWK->last_hash]; + MAWK->hash_table[h] = q; + + /* save p */ + p->link = MAWK->save_list; + MAWK->save_list = p; + + return &q->symtab; +} + +/* restore all global indentifiers */ +void mawk_restore_ids(mawk_state_t * MAWK) +{ + register HASHNODE *p, *q; + register unsigned h; + + q = MAWK->save_list; + MAWK->save_list = (HASHNODE *) 0; + while (q) { + p = q; + q = q->link; + mawk_zfree(MAWK, delete(MAWK, p->symtab.name), sizeof(HASHNODE)); + p->link = MAWK->hash_table[h = MAWK->last_hash]; + MAWK->hash_table[h] = p; + } +} + + +/* search the symbol table backwards for the + disassembler. This is slow -- so what +*/ + +const char *mawk_reverse_uk = "unknown"; + +const char *mawk_reverse_find(mawk_state_t *MAWK, int type, PTR ptr) +{ + mawk_cell_t *cp; + mawk_array_t array; + + int i; + HASHNODE *p; + + + switch (type) { + case ST_VAR: + case ST_FIELD: + cp = *(mawk_cell_t **) ptr; + break; + + case ST_ARRAY: + array = *(mawk_array_t *) ptr; + break; + + default: + return mawk_reverse_uk; + } + + for (i = 0; i < HASH_PRIME; i++) { + p = MAWK->hash_table[i]; + while (p) { + if (p->symtab.type == type) { + switch (type) { + case ST_VAR: + case ST_FIELD: + if (cp == p->symtab.stval.cp) + return p->symtab.name; + break; + + case ST_ARRAY: + if (array == p->symtab.stval.array) + return p->symtab.name; + break; + } + } + + p = p->link; + } + } + return mawk_reverse_uk; +} + +#ifdef MAWK_MEM_PEDANTIC +/* free global hash entries */ +void mawk_hash_clear(mawk_state_t *MAWK) +{ + register HASHNODE *p, *next; + int n; + + for(n = 0; n < HASH_PRIME; n++) { + for(p = MAWK->hash_table[n]; p != NULL; p = next) { + next = p->link; + mawk_delete_cell(MAWK, &(p->symtab)); + mawk_delete_node(p); + } + } +} +#endif diff --git a/src/libmawk/init.c b/src/libmawk/init.c new file mode 100644 index 0000000..40b598a --- /dev/null +++ b/src/libmawk/init.c @@ -0,0 +1,515 @@ + +/******************************************** +init.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "conf.h" +#include +#include +#include +#include "mawk.h" +#include "code.h" +#include "memory.h" +#include "symtype.h" +#include "init.h" +#include "scan.h" +#include "bi_vars.h" +#include "field.h" +#include "zmalloc.h" +#include "vio.h" +#include "version.h" +#include "da_bin_helper.h" +#include "cell.h" +#include "files.h" + +static int process_cmdline(mawk_state_t *, int, char **); +static void set_ARGV(mawk_state_t *, int, char **, int); +static void bad_option(mawk_state_t *, char *); +static void no_program(mawk_state_t *); + +extern int mawk_is_cmdline_assign(mawk_state_t *, char *); + +#ifndef SET_PROGNAME +#define SET_PROGNAME() \ + {char *p = strrchr(argv[0],'/') ;\ + MAWK->progname = p ? p+1 : argv[0] ; } +#endif + +static const mawk_string_t null_str_ = { 0, 1, "" }; /* read-only */ + +static const mawk_escape_t escape_test_[ET_END + 1] = /* read-only */ +{ + {'n', '\n'}, + {'t', '\t'}, + {'f', '\f'}, + {'b', '\b'}, + {'r', '\r'}, + {'a', '\07'}, + {'v', '\013'}, + {'\\', '\\'}, + {'\"', '\"'}, + {0, 0} +}; + +mawk_state_t *mawk_initialize_alloc(void) +{ + unsigned long mpow2[NUM_CELL_TYPES] = { + 1LU<<0LU, 1LU<<1LU, 1LU<<2LU, 1LU<<3LU, 1LU<<4LU, 1LU<<5LU, 1LU<<6LU, + 1LU<<7LU, 1LU<<8LU, 1LU<<9LU, 1LU<<10LU, 1LU<<11LU, 1LU<<12LU, + 1LU<<13LU, 1LU<<14LU, 1LU<<15LU, 1LU<<16LU, 1LU<<17LU, 1LU<<18LU, + 1LU<<19LU, 1LU<<20LU, 1LU<<21LU, 1LU<<22LU, 1LU<<23LU + }; + SEPARATOR rs_ = { SEP_CHAR, '\n' }; + mawk_cell_t fs_ = { C_SPACE }; + + mawk_state_t *MAWK; + MAWK = calloc(sizeof(mawk_state_t), 1); + MAWK->Argv = mawk_array_new(MAWK, NULL); + MAWK->scripts_loaded = mawk_array_new(MAWK, NULL); + + MAWK->last_token_lineno = -1; + + MAWK->null_str = null_str_; + MAWK->argi = MAWK_NUM_ONE; + MAWK->current_token = -1; + MAWK->execution_start = 0; + MAWK->ps.code_move_level = 0; + MAWK->stack_base = MAWK->eval_stack; /* these can move for deep recursion */ + MAWK->stack_danger = MAWK->eval_stack + DANGER; + memcpy(MAWK->escape_test, escape_test_, sizeof(escape_test_)); + MAWK->interactive_flag = 0; + MAWK->shell = mawk_strdup_("/bin/sh"); + MAWK->max_field = MAX_SPLIT; + MAWK->rs_shadow = rs_; + MAWK->fs_shadow = fs_; + memcpy(MAWK->scan_code, mawk_scan_code, sizeof(mawk_scan_code)); + + /* this can be moved and enlarged by -W sprintf=num */ + MAWK->sprintf_buff = string_buff; + MAWK->sprintf_limit = string_buff + sizeof(MAWK->tempbuff); + MAWK->mpow2 = malloc(sizeof(mpow2)); + memcpy(MAWK->mpow2, mpow2, sizeof(mpow2)); + MAWK->fbank[0] = MAWK->field; + + mawk_bi_vars_init(MAWK); /* load the builtin variables */ + mawk_bi_funct_init(MAWK); /* load the builtin functions */ +#ifndef MAWK_NO_COMP + mawk_kw_init(MAWK); /* load the keywords */ +#endif + mawk_field_init(MAWK); + mawk_fpe_init(); + return MAWK; +} + + +mawk_state_t *mawk_initialize_argv(mawk_state_t *MAWK, int argc, char **argv) +{ + SET_PROGNAME(); + + if (!process_cmdline(MAWK, argc, argv)) + return NULL; + + return MAWK; +} + + +mawk_state_t *mawk_initialize(int argc, char **argv, mawk_vio_init_t vio_init) +{ + mawk_state_t *MAWK, *MAWK2; + MAWK = mawk_initialize_alloc(); + if (MAWK == NULL) + return NULL; + + MAWK->vio_init = vio_init; + + MAWK2 = mawk_initialize_argv(MAWK, argc, argv); + if (MAWK2 == NULL) { + /* TODO: free MAWK */ + return NULL; + } + mawk_code_init(MAWK2); + return MAWK2; +} + + +void mawk_hash_clear(mawk_state_t *MAWK); + +void mawk_uninitialize(mawk_state_t * m) +{ + FBLOCK *fb, *fbn; + +#ifndef MAWK_NO_COMP +#ifdef MAWK_MEM_PEDANTIC + mawk_kw_uninit(m); +#endif +#endif + +#ifdef MAWK_MEM_PEDANTIC +mawk_bi_funct_uninit(m); +#endif + + /* free data of c calls compiled into the script */ + for(fb = m->c_funcs; fb != NULL; fb = fbn) { + fbn = fb->c_next; + free((char *)fb->name); + free(fb); + } + + if (m->shell != NULL) + free(m->shell); + if (m->mpow2 != NULL) + free(m->mpow2); + +#ifdef MAWK_MEM_PEDANTIC + mawk_bi_vars_uninit(m); + mawk_field_uninit(m); +#endif + + if (m->mawk_parser_stack != NULL) + free(m->mawk_parser_stack); + + if (m->ps.buffer != NULL) + mawk_zfree(m, m->ps.buffer, BUFFSZ + 1); + + if (m->scripts_loaded != NULL) + mawk_array_destroy(m, m->scripts_loaded); + +#ifdef MAWK_MEM_PEDANTIC + if (m->begin_start != NULL) + mawk_zfree(m, m->begin_start, m->begin_size); + + if (m->main_start != NULL) + mawk_zfree(m, m->main_start, m->main_size); + + if (m->end_start_orig != NULL) + mawk_zfree(m, m->end_start_orig, m->end_size); + + { + struct mawk_fdump *fdl, *next; + for(fdl = m->fdump_list; fdl != NULL; fdl = next) { + next = fdl->link; + mawk_delete(m, fdl->fbp->name, 1); + MAWK_ZFREE(m, fdl); + } + } + + /* free global variables */ + mawk_hash_clear(m); +#endif + + /* close and free all files */ + mawk_file_uninit(m); + + mawk_free_all(m); + free(m); +} + +static void bad_option(mawk_state_t *MAWK, char *s) +{ + mawk_errmsg(MAWK, 0, "not an option: %s", s); + mawk_exit(MAWK, 2); +} + +static void no_program(mawk_state_t * MAWK) +{ + mawk_exit(MAWK, 0); +} + +void mawk_append_input_file(mawk_state_t * MAWK, const char *fn, int bytecode) +{ + /* first file goes in pfile_name ; any more go on a list */ + if (MAWK->ps.pfile_name) { + if (MAWK->pfile_list_tail == NULL) { + MAWK->pfile_list_tail = MAWK_ZMALLOC(MAWK, PFILE); + MAWK->pfile_list_tail->fname = fn; + MAWK->pfile_list_tail->bytecode = bytecode; + MAWK->pfile_list = MAWK->pfile_list_tail; + } + else { + MAWK->pfile_list_tail->link = MAWK_ZMALLOC(MAWK, PFILE); + MAWK->pfile_list_tail = MAWK->pfile_list_tail->link; + MAWK->pfile_list_tail->fname = NULL; + MAWK->pfile_list_tail->bytecode = 0; + } + MAWK->pfile_list_tail->link = NULL; + } + else { + MAWK->ps.pfile_name = fn; + MAWK->ps.pfile_bytecode = bytecode; + } +} + +#ifdef MAWK_NO_COMP +/* load a binary script; return 1 on success, 0 on failure */ +static int mawk_load_bin(mawk_state_t *MAWK, const char *path) +{ + if (mawk_load_code_bin(MAWK, path) != 0) { + mawk_errmsg(MAWK, 0, "failed to load or link binary script %s", path); + mawk_exitval(MAWK, 2, -1); + return 0; + } + return 1; +} + +/* load all binary scripts; return 1 on success, 0 on failure */ +static int mawk_load_bins(mawk_state_t *MAWK) +{ + PFILE *p; + + if (MAWK->ps.pfile_name != NULL) + if (!mawk_load_bin(MAWK, MAWK->ps.pfile_name)) + return 0; + + for(p = MAWK->pfile_list; p != NULL; p = p->link) { + if (!mawk_load_bin(MAWK, p->fname)) + return 0; + } + return 1; +} +#endif + +/* not a real implementation, just enough for our needs */ +static int mawk_strcasecmp(const char *s1, const char *s2) +{ + for(;;) { + if (tolower(*s1) != tolower(*s2)) + return s2 - s1; + if (*s1 == '\0') + return 0; + s1++; + s2++; + } +} + +static int process_cmdline(mawk_state_t *MAWK, int argc, char **argv) +{ + int i, nextarg; + char *optarg; + MAWK->pfile_list = NULL; + MAWK->pfile_list_tail = NULL; + + for (i = 1; i < argc && argv[i][0] == '-'; i = nextarg) { + if (argv[i][1] == 0) { /* - alone */ + if (!MAWK->ps.pfile_name) { + no_program(MAWK); + return 0; + } + break; /* the for loop */ + } + /* safe to look at argv[i][2] */ + + if (argv[i][2] == 0) { + if (i == argc - 1 && argv[i][1] != '-') { + if (strchr("WFvf", argv[i][1])) { + mawk_errmsg(MAWK, 0, "option %s lacks argument", argv[i]); + mawk_exit_(MAWK, 2); + return 0; + } + bad_option(MAWK, argv[i]); + } + + optarg = argv[i + 1]; + nextarg = i + 2; + } + else { /* argument glued to option */ + + optarg = &argv[i][2]; + nextarg = i + 1; + } + + switch (argv[i][1]) { + case 'W': + + if (optarg[0] >= 'a' && optarg[0] <= 'z') + optarg[0] += 'A' - 'a'; + if (optarg[0] == 'V') + mawk_print_version(MAWK); + else if (optarg[0] == 'C') + MAWK->dump_code_flag = 2; + else if (optarg[0] == 'D') { + if (mawk_strcasecmp(optarg, "DUMP") == 0) + MAWK->dump_code_flag = 1; + else if (mawk_strcasecmp(optarg, "DUMPSYM") == 0) + MAWK->dump_sym_flag = 1; + else if (mawk_strcasecmp(optarg, "DEBUG") == 0) + MAWK->debug_symbols = 1; + } + else if (optarg[0] == 'S') { + char *p = strchr(optarg, '='); + int x = p ? atoi(p + 1) : 0; + + if (x > SPRINTF_SZ) { + MAWK->sprintf_buff = (char *) mawk_zmalloc(MAWK, x); + MAWK->sprintf_limit = MAWK->sprintf_buff + x; + } + } + else if (optarg[0] == 'P') { + MAWK->posix_space_flag = 1; + } + else if (optarg[0] == 'E') { + if (MAWK->ps.pfile_name) { + mawk_errmsg(MAWK, 0, "-W exec is incompatible with -f"); + mawk_exit_(MAWK, 2); + return 0; + } + else if (nextarg == argc) + no_program(MAWK); + + MAWK->ps.pfile_name = argv[nextarg]; + MAWK->ps.pfile_bytecode = 0; + i = nextarg + 1; + goto no_more_opts; + } + else if (optarg[0] == 'I') { + MAWK->interactive_flag = 1; + } + else if (strncmp(optarg, "Maxmem=", 7) == 0) { + char *end; + MAWK->mm_max = strtol(optarg+7, &end, 10); + switch(*end) { + case '\0': + break; + case 'k': + case 'K': + MAWK->mm_max *= 1024; + break; + case 'm': + case 'M': + MAWK->mm_max *= 1024 * 1024; + break; + default: + MAWK->mm_max = 0; + mawk_errmsg(MAWK, 0, "invalid memory size for -Wmaxmem (must be integer with optional K or M suffix): '%s'", optarg+7); + } + } + else + mawk_errmsg(MAWK, 0, "vacuous option: -W %s", optarg); + + + break; + + case 'v': +#ifdef MAWK_NO_EXEC + mawk_errmsg(MAWK, 0, "Compiler-only version of mawk can not set runtime variables with -v"); + mawk_exit_(MAWK, 2); + return 0; +#else + if (!mawk_is_cmdline_assign(MAWK, optarg)) { + mawk_errmsg(MAWK, 0, "improper assignment: -v %s", optarg); + mawk_exit_(MAWK, 2); + return 0; + } +#endif + break; + + case 'F': + + mawk_rm_escape(MAWK, optarg); /* recognize escape sequences */ + mawk_cell_destroy(MAWK, MAWK_FS); + MAWK_FS->type = C_STRING; + MAWK_FS->ptr = (PTR) mawk_new_STRING(MAWK, optarg); + mawk_cellcpy(MAWK, &MAWK->fs_shadow, MAWK_FS); + mawk_cast_for_split(MAWK, &MAWK->fs_shadow); + break; + + case '-': + if (argv[i][2] != 0) + bad_option(MAWK, argv[i]); + i++; + goto no_more_opts; +#ifndef MAWK_NO_COMP + case 'f': + mawk_append_input_file(MAWK, optarg, 0); + break; +#endif + case 'b': + mawk_append_input_file(MAWK, optarg, 1); + break; + default: + bad_option(MAWK, argv[i]); + } + } + +no_more_opts: + if (MAWK->ps.pfile_name) { /* program from -f or -b */ + set_ARGV(MAWK, argc, argv, i); +#ifdef MAWK_NO_COMP + mawk_load_bins(MAWK); +#else + mawk_scan_init(MAWK, (char *) 0); +#endif + if (MAWK->do_exit) + return 0; + } + else { /* program given on command line (no -f or -b) */ + + if (i == argc) { + if (!MAWK->no_program_ok) { + no_program(MAWK); + return 0; + } + } + set_ARGV(MAWK, argc, argv, i + 1); +#ifndef MAWK_NO_COMP + if (i != argc) + mawk_scan_init(MAWK, argv[i]); + else + mawk_scan_init(MAWK, "BEGIN {}"); +#else + bad_option(MAWK, argv[i]); + return 1; +#endif + if (MAWK->do_exit) + return 0; + +/* #endif */ + } + return 1; +} + + +static void set_ARGV(mawk_state_t *MAWK, int argc, char **argv, int i) +{ + /* argv[i] = ARGV[i] */ + SYMTAB *st_p; + mawk_cell_t idx, cl; + + st_p = mawk_insert(MAWK, "ARGV"); + st_p->type = ST_ARRAY; + st_p->stval.array = MAWK->Argv; + + /* store progran name in ARGV[] */ + idx.type = C_NUM; + idx.d.dval = MAWK_NUM_ZERO; + cl.type = C_STRING; + cl.ptr = (PTR) mawk_new_STRING(MAWK, MAWK->progname); + mawk_array_set(MAWK, st_p->stval.array, &idx, &cl); + free_STRING((mawk_string_t *)cl.ptr); + + /* ARGV[0] is set, do the rest + The type of ARGV[1] ... should be C_MBSTRN + because the user might enter numbers from the command line */ + + for (idx.d.dval = MAWK_NUM_ONE; i < argc; i++, idx.d.dval += MAWK_NUM_ONE) { + + cl.type = C_MBSTRN; + cl.ptr = (PTR) mawk_new_STRING(MAWK, argv[i]); + mawk_array_set(MAWK, st_p->stval.array, &idx, &cl); + free_STRING((mawk_string_t *)cl.ptr); + } + ARGC->type = C_NUM; + ARGC->d.dval = idx.d.dval; +} + + + diff --git a/src/libmawk/init.h b/src/libmawk/init.h new file mode 100644 index 0000000..040ebd6 --- /dev/null +++ b/src/libmawk/init.h @@ -0,0 +1,63 @@ + +/******************************************** +init.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef INIT_H +#define INIT_H + +#include + +/* nodes to link file names for multiple + -f option */ + +extern char *sprintf_buff, *sprintf_limit; + + +/* high levelinit: all 3 steps in order: */ +mawk_state_t *mawk_initialize(int argc, char **argv, mawk_vio_init_t vio_init); + +/* low level init, step 1: allocate the context and init constants*/ +mawk_state_t *mawk_initialize_alloc(void); + +/* set up vio and builtins here */ + +/* low level init, step 2: set up args and load scripts */ +mawk_state_t *mawk_initialize_argv(mawk_state_t *MAWK, int argc, char **argv); + +/* low level init, step 3: initialize the code */ +void code_init(mawk_state_t *); + + + +void mawk_uninitialize(mawk_state_t * m); +void code_cleanup(void); +void compile_cleanup(void); +int mawk_scan_init(mawk_state_t *, char *); +void bi_vars_init(mawk_state_t * MAWK); +void bi_funct_init(mawk_state_t *); +void print_init(void); +void mawk_kw_init(mawk_state_t * MAWK); +#ifdef MAWK_MEM_PEDANTIC +void mawk_kw_uninit(mawk_state_t * MAWK); +#endif +void mawk_field_init(mawk_state_t *); +void mawk_fpe_init(void); +void mawk_set_stderr(mawk_state_t * MAWK); +void mawk_append_input_file(mawk_state_t * MAWK, const char *fn, int bytecode); + +int mawk_is_cmdline_assign(mawk_state_t *, char *); + + +#endif /* INIT_H */ diff --git a/src/libmawk/init_nocomp.c b/src/libmawk/init_nocomp.c new file mode 100644 index 0000000..02692b1 --- /dev/null +++ b/src/libmawk/init_nocomp.c @@ -0,0 +1,2 @@ +#define MAWK_NO_COMP +#include "init.c" diff --git a/src/libmawk/jmp.c b/src/libmawk/jmp.c new file mode 100644 index 0000000..07f4297 --- /dev/null +++ b/src/libmawk/jmp.c @@ -0,0 +1,208 @@ + +/******************************************** +jmp.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* this module deals with back patching jumps, breaks and continues, + and with save and restoring code when we move code. + There are three stacks. If we encounter a compile error, the + stacks are frozen, i.e., we do not attempt error recovery + on the stacks +*/ + + +#include "mawk.h" +#include "symtype.h" +#include "jmp.h" +#include "code.h" +#include "sizes.h" +#include "init.h" +#include "memory.h" + +#define error_state (MAWK->compile_error_count>0) + + +/*---------- back patching jumps ---------------*/ + +void mawk_code_jmp(mawk_state_t *MAWK, int jtype, INST *target) +{ + if (error_state) + return; + + /* WARNING: Don't emit any code before using target or + relocation might make it invalid */ + + if (target) + mawk_code2op(MAWK, jtype, target - (mawk_code_ptr + 1)); + else { + register JMP *p = MAWK_ZMALLOC(MAWK, JMP); + + /* stack for back patch */ + mawk_code2op(MAWK, jtype, 0); + p->source_offset = mawk_code_offset - 1; + p->link = MAWK->jmp_top; + MAWK->jmp_top = p; + } +} + +void mawk_patch_jmp(mawk_state_t *MAWK, INST *target) /* patch a jump on the jmp_stack */ +{ + register JMP *p; + register INST *source; /* jmp starts here */ + + if (!error_state) { +#ifdef DEBUG + if (!MAWK->jmp_top) + mawk_bozo(MAWK, "jmp stack underflow"); +#endif + + p = MAWK->jmp_top; + MAWK->jmp_top = p->link; + source = p->source_offset + mawk_code_base; + source->op = target - source; + + MAWK_ZFREE(MAWK, p); + } +} + + +/*-- break and continue -------*/ + +void mawk_BC_new(mawk_state_t * MAWK) +{ /* mark the start of a loop */ + mawk_BC_insert(MAWK, 0, (INST *) 0); +} + +void mawk_BC_insert(mawk_state_t *MAWK, int type, INST *address) +{ + register BC *p; + + if (error_state) + return; + + if (type && !MAWK->bc_top) { + mawk_compile_error(MAWK, "%s statement outside of loop", type == 'B' ? "break" : "continue"); + + return; + } + else { + p = MAWK_ZMALLOC(MAWK, BC); + p->type = type; + p->source_offset = address - mawk_code_base; + p->link = MAWK->bc_top; + MAWK->bc_top = p; + } +} + + +/* patch all break and continues for one loop */ +void mawk_BC_clear(mawk_state_t *MAWK, INST *B_address, INST *C_address) +{ + register BC *p, *q; + INST *source; + + if (error_state) + return; + + p = MAWK->bc_top; + /* pop down to the mark node */ + while (p->type) { + source = mawk_code_base + p->source_offset; + source->op = (p->type == 'B' ? B_address : C_address) + - source; + + q = p; + p = p->link; + MAWK_ZFREE(MAWK, q); + } + /* remove the mark node */ + MAWK->bc_top = p->link; + MAWK_ZFREE(MAWK, p); +} + +/*----- moving code --------------------------*/ + +#define NO_SCOPE -1 + /* means relocation of resolve list not needed */ + +void mawk_code_push(mawk_state_t *MAWK, INST *code, unsigned len, int scope, FBLOCK *fbp) +{ + register MC *p; + + if (!error_state) { + p = MAWK_ZMALLOC(MAWK, MC); + p->len = len; + p->link = MAWK->mc_top; + MAWK->mc_top = p; + + if (len) { + p->code = (INST *) mawk_zmalloc(MAWK, sizeof(INST) * len); + memcpy(p->code, code, sizeof(INST) * len); + } + if (!MAWK->resolve_list) + p->scope = NO_SCOPE; + else { + p->scope = scope; + p->move_level = MAWK->ps.code_move_level; + p->fbp = fbp; + if (code != NULL) + p->offset = code - mawk_code_base; + else + p->offset = 0; + } + } + MAWK->ps.code_move_level++; +} + +/* copy the code at the top of the mc stack to target. + return the number of INSTs moved */ + +unsigned mawk_code_pop(mawk_state_t *MAWK, INST *target) +{ + register MC *p; + unsigned len; + int target_offset; + + if (error_state) + return 0; + +#ifdef DEBUG + if (!MAWK->mc_top) + mawk_bozo(MAWK, "mc underflow"); +#endif + + p = MAWK->mc_top; + MAWK->mc_top = p->link; + len = p->len; + + while (target + len >= mawk_code_warn) { + target_offset = target - mawk_code_base; + mawk_code_grow(MAWK); + target = mawk_code_base + target_offset; + } + + if (len) { + memcpy(target, p->code, len * sizeof(INST)); + mawk_zfree(MAWK, p->code, len * sizeof(INST)); + } + + if (p->scope != NO_SCOPE) { + target_offset = target - mawk_code_base; + mawk_relocate_resolve_list(MAWK, p->scope, p->move_level, p->fbp, p->offset, len, target_offset - p->offset); + } + + MAWK_ZFREE(MAWK, p); + MAWK->ps.code_move_level--; + return len; +} diff --git a/src/libmawk/jmp.h b/src/libmawk/jmp.h new file mode 100644 index 0000000..41d0834 --- /dev/null +++ b/src/libmawk/jmp.h @@ -0,0 +1,28 @@ + +/******************************************** +jmp.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef JMP_H +#define JMP_H + +void mawk_BC_new(mawk_state_t *); +void mawk_BC_insert(mawk_state_t *, int, INST *); +void mawk_BC_clear(mawk_state_t *, INST *, INST *); +void mawk_code_push(mawk_state_t *, INST *, unsigned, int, FBLOCK *); +unsigned mawk_code_pop(mawk_state_t *, INST *); +void mawk_code_jmp(mawk_state_t *, int, INST *); +void mawk_patch_jmp(mawk_state_t *, INST *); + +#endif /* JMP_H */ diff --git a/src/libmawk/kw.c b/src/libmawk/kw.c new file mode 100644 index 0000000..2e4bb55 --- /dev/null +++ b/src/libmawk/kw.c @@ -0,0 +1,88 @@ + +/******************************************** +kw.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "symtype.h" +#include "parse.h" +#include "init.h" + +static const struct kw { + char *text; + short kw; +} keywords[] = { /* read-only */ + {"print", PRINT}, + {"printf", PRINTF}, + {"do", DO}, + {"while", WHILE}, + {"for", FOR}, + {"break", BREAK}, + {"continue", CONTINUE}, + {"if", IF}, + {"else", ELSE}, + {"in", IN}, + {"delete", DELETE}, + {"split", SPLIT}, + {"match", MATCH_FUNC}, + {"BEGIN", BEGIN}, + {"END", END}, + {"include", INCLUDE}, + {"exit", EXIT}, + {"next", NEXT}, + {"return", RETURN}, + {"getline", GETLINE}, + {"sub", SUB}, + {"gsub", GSUB}, + {"function", FUNCTION}, + {NULL, 0} +}; + +/* put keywords in the symbol table */ +void mawk_kw_init(mawk_state_t * MAWK) +{ + register const struct kw *p = keywords; + register SYMTAB *q; + + while (p->text) { + q = mawk_insert(MAWK, p->text); + q->type = ST_KEYWORD; + q->stval.kw = p++->kw; + } +} + +#ifdef MAWK_MEM_PEDANTIC +void mawk_kw_uninit(mawk_state_t * MAWK) +{ + register const struct kw *p = keywords; + register SYMTAB *q; + + while (p->text) { + mawk_delete(MAWK, p->text, 0); + p++; + } +} +#endif + +/* mawk_find a keyword to emit an error message */ +const char *mawk_find_kw_str(int kw_token) +{ + const struct kw *p; + + for (p = keywords; p->text; p++) + if (p->kw == kw_token) + return p->text; + /* search failed */ + return (char *) 0; +} diff --git a/src/libmawk/libmawk.c b/src/libmawk/libmawk.c new file mode 100644 index 0000000..0abefe4 --- /dev/null +++ b/src/libmawk/libmawk.c @@ -0,0 +1,713 @@ +/******************************************** +libmawk (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include +#include +#include +#include +#include "libmawk.h" +#include "debug.h" +#include "memory.h" +#include "sizes.h" +#include "array.h" +#include "fin.h" +#include "num.h" +#include "vio.h" +#include "vars.h" +#include "vio.h" +#include "vio_fifo.h" +#include "vio_orig.h" + +mawk_state_t *libmawk_initialize_stage1(void) +{ + mawk_state_t *m; + + m = mawk_initialize_alloc(); + + m->separate_begin = 1; + m->suppress_undefined_function_warning = 1; + m->no_program_ok = 1; + return m; +} + +mawk_state_t *libmawk_initialize_stage2(mawk_state_t *m, int argc, char *argv[]) +{ + mawk_state_t *m2; + char *argv_dummy[] = { + "(null)", + "-f", + "/dev/null", + NULL + }; + + if (argv == NULL) { + argc = 3; + argv = argv_dummy; + } + + /* load and parse */ + m2 = mawk_initialize_argv(m, argc, argv); + + if (m2 == NULL) { +#warning TODO: cleanup m + return NULL; + } + m = m2; + + mawk_code_init(m); + + /* if the app failed to load scripts from argv[], add a dummy + empty BEGIN - if later the app feeds in some script, it will + be appended to the empty BEGIN, but there won't be any side + effect of this move. */ + if (*m->ps.buffp == '\0') + strcpy((char *)m->ps.buffp, "BEGIN {}\n"); + + mawk_parse(m); + + return m; +} + +mawk_state_t *libmawk_initialize_stage3(mawk_state_t *m) +{ + + if (m->execution_start == NULL) + return NULL; + + m->mawk_state = EXECUTION; + + if (m->debug_symbols) + mawk_debug_callstack_push(m, &mawk_debug_begin); + + mawk_execute(m, m->execution_start, m->eval_stack - 1, 0) ; + + if (m->debug_symbols) + mawk_debug_callstack_pop(m); + + return m; +} + +int libmawk_initialize_stdio(mawk_state_t *m, int stdin_apps, int stdout_apps, int stderr_apps) +{ + mawk_vio_t *vf; + + mawk_vio_orig_setup_stdio(m, stdin_apps, stdout_apps, stderr_apps); + + /* stdin is a fifo */ + if (!stdin_apps) { + vf = mawk_vio_fifo_open(m, NULL, MAWK_VIO_I); + mawk_file_register(m, "/dev/stdin", F_IN, vf); + } + + if (!stdout_apps) { + vf = mawk_vio_fifo_open(m, NULL, MAWK_VIO_O_APPEND); + mawk_file_register(m, "/dev/stdout", F_APPEND, vf); + } + + if (!stderr_apps) { + vf = mawk_vio_fifo_open(m, NULL, MAWK_VIO_O_APPEND); + mawk_file_register(m, "/dev/stderr", F_APPEND, vf); + } + + /* file operation is handled by the orig vio */ + m->vio_init = mawk_vio_orig_init; + return 0; +} + +mawk_state_t *libmawk_initialize(int argc, char *argv[]) +{ + mawk_state_t *m; + + m = libmawk_initialize_stage1(); + + if (m == NULL) + return NULL; + + /* stdout and stderr are bound to the process' stdout and stderr */ + libmawk_initialize_stdio(m, 0, 1, 1); + + m = libmawk_initialize_stage2(m, argc, argv); + if (m != NULL) + m = libmawk_initialize_stage3(m); + + return m; +} + + +void libmawk_run_main(mawk_state_t *m) +{ + /* don't run empty main */ + if (m->main_start == NULL) + return; + + if (m->debug_symbols) + mawk_debug_callstack_push(m, &mawk_debug_main); + + mawk_execute(m, m->main_start, m->eval_stack - 1, 0); + + if (m->debug_symbols) + mawk_debug_callstack_pop(m); +} + +void libmawk_uninitialize_stage1(mawk_state_t *m) +{ + INST exit0 = {_EXIT0}; + mawk_execute(m, &exit0, m->eval_stack - 1, 0); +} + +void libmawk_uninitialize_stage2(mawk_state_t *m) +{ + libmawk_close_input(m); + + if (m->main_input != NULL) + mawk_file_close_(m, m->main_input); + + mawk_uninitialize(m); +} + + +void libmawk_uninitialize(mawk_state_t *m) +{ + libmawk_uninitialize_stage1(m); + libmawk_uninitialize_stage2(m); +} + +/* callback from mawk */ +void mawk_exit_(mawk_state_t *MAWK, int x) +{ + /* we do not really exit */ + MAWK->final_exit_code = MAWK->rt_exit_code != 0 ? MAWK->rt_exit_code : x; + MAWK->wants_to_exit = 1; +} + +int libmawk_append_ninput(mawk_state_t *m, const char *input, int len) +{ + mawk_vio_t *vf; + vf = m->fnode_stdin->vf; + if (vf == NULL) + return -1; + return mawk_vio_fifo_write_app(m, vf, input, len); +} + +int libmawk_append_input(mawk_state_t *m, const char *input_str) +{ + return libmawk_append_ninput(m, input_str, strlen(input_str)); +} + +int libmawk_close_input(mawk_state_t *m) +{ + mawk_vio_t *vf; + if (m->fnode_stdin == NULL) + return -1; + vf = m->fnode_stdin->vf; + if (vf == NULL) + return -1; + if (vf->imp->vclose == mawk_vio_fifo_imp.vclose) + mawk_vio_fifo_eof_from_app(m, vf); + return 0; +} + +mawk_cell_t *libmawk_set_cellv(mawk_state_t *MAWK, mawk_cell_t *cell, const char argtype, va_list *ap) +{ + char *s; + int i; + mawk_num_t d; + + switch(argtype) { + case 's': + s = va_arg((*ap), char *); + cell->type = C_STRING ; + cell->ptr = mawk_new_STRING(MAWK, s); + break; + case 'd': + i = va_arg((*ap), int); + cell->type = C_NUM ; + cell->d.dval = i; + break; +#ifndef MAWK_NO_FLOAT + case 'f': + d = va_arg((*ap), double); + cell->type = C_NUM; + cell->d.dval = d; + break; +#endif + default: + return NULL; + } + return cell; +} + +mawk_cell_t *libmawk_set_cellp(mawk_state_t *MAWK, mawk_cell_t *cell, const char argtype, void *argp) +{ + char *s; + int i; + mawk_num_t d; + + switch(argtype) { + case 's': + s = (char *)argp; + cell->type = C_STRING ; + cell->ptr = mawk_new_STRING(MAWK, s); + break; + case 'd': + i = *(int *)argp; + cell->type = C_NUM ; + cell->d.dval = i; + break; +#ifndef MAWK_NO_FLOAT + case 'f': + d = *(double *)argp; + cell->type = C_NUM ; + cell->d.dval = d; + break; +#endif + default: + return NULL; + } + return cell; +} + +mawk_cell_t *libmawk_set_cell(mawk_state_t *MAWK, mawk_cell_t *cell, const char argtype, ...) +{ + va_list ap; + mawk_cell_t *ret; + + va_start(ap, argtype); + ret = libmawk_set_cellv(MAWK, cell, argtype, &ap); + va_end(ap); + return ret; +} + + +mawk_exec_result_t libmawk_call_function(mawk_state_t *MAWK, const char *fname, mawk_cell_t *retc, const char *argtypes, ...) +{ + va_list ap; + int numargs; + SYMTAB *fs; + mawk_cell_t *ret, *tmp; + mawk_cell_t *orig_sp; + FBLOCK *fbp; + FBLOCK fbp_c; + + if (retc != NULL) + mawk_cell_destroy(MAWK, retc); + + fs = mawk_find(MAWK, fname, 0); + if ((fs == NULL) || ((fs->type != ST_FUNCT) && (fs->type != ST_C_FUNCTION))) { + /* does not exist or not a function */ + return -1; + } + + if (fs->type == ST_C_FUNCTION) { + fbp = &fbp_c; + fbp_c.name = fs->name; + fbp_c.code = NULL; + } + else + fbp = fs->stval.fbp; + +#warning TODO: check if we need to grow the stack + + orig_sp = MAWK->sp; + va_start(ap, argtypes); + for(numargs = 0;*argtypes != '\0';argtypes++,numargs++) { + inc_mawksp(); + + if (libmawk_set_cellv(MAWK, MAWK->sp, *argtypes, &ap) == NULL) + goto err_cleanup; + } + va_end(ap); + return mawk_call(MAWK, fbp, numargs, retc); + + err_cleanup:; + va_end(ap); + for(MAWK->sp--; MAWK->sp > orig_sp; MAWK->sp--) + mawk_cell_destroy(MAWK, MAWK->sp); + + MAWK->sp = orig_sp; + return -1; +} + +mawk_exec_result_t libmawk_call_functionp(mawk_state_t *MAWK, const char *fname, mawk_cell_t *retc, const char *argtypes, void **args) +{ + int numargs; + SYMTAB *fs; + mawk_cell_t *ret, *tmp; + mawk_cell_t *orig_sp; + FBLOCK *fbp; + FBLOCK fbp_c; + + if (retc != NULL) + mawk_cell_destroy(MAWK, retc); + + fs = mawk_find(MAWK, fname, 0); + if ((fs == NULL) || ((fs->type != ST_FUNCT) && (fs->type != ST_C_FUNCTION))) { + /* does not exist or not a function */ + return -1; + } + + if (fs->type == ST_C_FUNCTION) { + fbp = &fbp_c; + fbp_c.name = fs->name; + fbp_c.code = NULL; + } + else + fbp = fs->stval.fbp; + +#warning TODO: check if we need to grow the stack + + orig_sp = MAWK->sp; + for(numargs = 0;*argtypes != '\0';argtypes++,numargs++,args++) { + inc_mawksp(); + if (libmawk_set_cellp(MAWK, MAWK->sp, *argtypes, *args) == NULL) + goto err_cleanup; + } + return mawk_call(MAWK, fs->stval.fbp, numargs, retc); + + err_cleanup:; + for(MAWK->sp--; MAWK->sp > orig_sp; MAWK->sp--) + mawk_cell_destroy(MAWK, MAWK->sp); + + MAWK->sp = orig_sp; + return -1; + +} + +mawk_exec_result_t libmawk_call_functionc(mawk_state_t *MAWK, const char *fname, mawk_cell_t *retc, int argc, const mawk_cell_t *argv) +{ + int n; + SYMTAB *fs; + mawk_cell_t *ret, *tmp; + mawk_cell_t *orig_sp; + FBLOCK *fbp; + FBLOCK fbp_c; + + if (retc != NULL) + mawk_cell_destroy(MAWK, retc); + + fs = mawk_find(MAWK, fname, 0); + if ((fs == NULL) || ((fs->type != ST_FUNCT) && (fs->type != ST_C_FUNCTION))) { + /* does not exist or not a function */ + return -1; + } + + if (fs->type == ST_C_FUNCTION) { + fbp = &fbp_c; + fbp_c.name = fs->name; + fbp_c.code = NULL; + } + else + fbp = fs->stval.fbp; + +#warning TODO: check if we need to grow the stack + + for(n = 0; n< argc; n++) { + inc_mawksp(); + mawk_cellcpy(MAWK, MAWK->sp, &argv[n]); + } + return mawk_call(MAWK, fs->stval.fbp, argc, retc); +} + + +void libmawk_cell_destroy(mawk_state_t *MAWK, mawk_cell_t *c) +{ + mawk_cell_destroy(MAWK, c); + c->type = C_NOINIT; +} + +char *libmawk_print_cell(mawk_state_t *MAWK, const mawk_cell_t *c, char *buff, int size) +{ + char tmp[128]; + int len; + + if ((buff == NULL) || (size < 1)) + return NULL; + *buff = '\0'; + + switch(c->type) { + case C_NUM: +#ifdef MAWK_NO_FLOAT + len = sprintf(tmp, "%d", (int)c->d.dval); +#else + if (c->d.dval == (int)c->d.dval) + len = sprintf(tmp, "%d", (int)c->d.dval); + else + len = sprintf(tmp, "%f", c->d.dval); +#endif + if (len > size-1) + len = size-1; + goto copy_tmp; + case C_STRING: + strncpy(buff, ((mawk_string_t *)(c->ptr))->str, size); + buff[size-1] = '\0'; + break; + + case C_NOINIT: + /* should be empty string just as in awk */ + break; + case C_STRNUM: + case C_MBSTRN: +#warning TODO: we should be able to convert the above two + case C_RE: + case C_SPACE: + case C_SNULL: + case C_REPL: + case C_REPLV: + len = sprintf(buff, "Can't convert celltype %d\n", c->type); + break; + default: + len = sprintf(buff, "Invalid celltype %d\n", c->type); + break; + + } + return buff; + + copy_tmp:; + memcpy(buff, tmp, len); + buff[len] = '\0'; + return buff; +} + + +const mawk_cell_t *libmawk_get_var(mawk_state_t *MAWK, const char *vname) +{ + return mawk_get_var(MAWK, vname); +} + +static mawk_array_t array_prep_idx(mawk_state_t *MAWK, const char *arr_name, const char *idx, mawk_cell_t *idxc) +{ + SYMTAB *fs; + fs = mawk_find(MAWK, arr_name, 0); + + /* does symbol exist at all? */ + if (fs == NULL) + return NULL; + + /* exit if not an array */ + if (fs->type != ST_ARRAY) + return NULL; + + idxc->type = C_STRING; + idxc->ptr = (PTR) mawk_new_STRING(MAWK, idx); + + return (mawk_array_t)fs->stval.cp; +} + +int libmawk_get_array_at(mawk_state_t *MAWK, const char *arr_name, const char *idx, mawk_cell_t *result, int create) +{ + mawk_array_t arr; + mawk_cell_t idxc; + int res; + + + if (result != NULL) { + mawk_cell_destroy(MAWK, result); + result->type = C_NOINIT; + } + + arr = array_prep_idx(MAWK, arr_name, idx, &idxc); + if (arr == NULL) + return -1; + + res = mawk_array_find(MAWK, arr, &idxc, result, create); + mawk_cell_destroy(MAWK, &idxc); + return res; +} + +int libmawk_set_array_atv(mawk_state_t *MAWK, const char *arr_name, const char *idx, const char valtype, va_list *ap) +{ + mawk_array_t arr; + mawk_cell_t idxc, valc = libmawk_empty_cell; + + arr = array_prep_idx(MAWK, arr_name, idx, &idxc); + if (arr == NULL) + return -1; + + libmawk_set_cellv(MAWK, &valc, valtype, ap); + + mawk_array_set(MAWK, arr, &idxc, &valc); + mawk_cell_destroy(MAWK, &idxc); + mawk_cell_destroy(MAWK, &valc); + return 0; +} + +int libmawk_set_array_atp(mawk_state_t *MAWK, const char *arr_name, const char *idx, const char valtype, void *val) +{ + mawk_array_t arr; + mawk_cell_t idxc, valc = libmawk_empty_cell; + + arr = array_prep_idx(MAWK, arr_name, idx, &idxc); + if (arr == NULL) + return -1; + + libmawk_set_cellp(MAWK, &valc, valtype, val); + + mawk_array_set(MAWK, arr, &idxc, &valc); + mawk_cell_destroy(MAWK, &idxc); + mawk_cell_destroy(MAWK, &valc); + return 0; +} + +int libmawk_set_array_at(mawk_state_t *MAWK, const char *arr_name, const char *idx, const char valtype, ...) +{ + va_list ap; + int ret; + + va_start(ap, valtype); + ret = libmawk_set_array_atv(MAWK, arr_name, idx, valtype, &ap); + va_end(ap); + return ret; +} + +int libmawk_set_scalarv(mawk_state_t *MAWK, const char *var_name, const char valtype, va_list *ap) +{ + mawk_cell_t *c; + c = mawk_get_var(MAWK, var_name); + if (c == NULL) + return -1; + libmawk_set_cellv(MAWK, c, valtype, ap); + return 0; +} + +int libmawk_set_scalarp(mawk_state_t *MAWK, const char *var_name, const char valtype, void *val) +{ + mawk_cell_t *c; + c = mawk_get_var(MAWK, var_name); + if (c == NULL) + return -1; + libmawk_set_cellp(MAWK, c, valtype, val); + return 0; +} + +int libmawk_set_scalar(mawk_state_t *MAWK, const char *var_name, const char valtype, ...) +{ + va_list ap; + int ret; + + va_start(ap, valtype); + ret = libmawk_set_scalarv(MAWK, var_name, valtype, &ap); + va_end(ap); + return ret; +} + + +int libmawk_register_function(mawk_state_t *MAWK, const char *fname, libmawk_c_function *callback) +{ + SYMTAB *sym; + sym = mawk_find(MAWK, fname, 0); + + if (sym != NULL) { + /* special case: we already know symbol is a function but body is empty (typical for c calls) */ + if ((sym->type == ST_FUNCT) && (sym->stval.fbp->code == NULL)) + sym->type = ST_NONE; + + /* if symbol is already defined as something else, return error */ + if (sym->type != ST_NONE) + return 1; + } + else + sym = mawk_find(MAWK, mawk_strdup(MAWK, fname), 1); + + sym->type = ST_C_FUNCTION; + sym->stval.c_function.callback = callback; + sym->stval.c_function.func_userdata = MAWK->func_userdata; + return 0; +} + +mawk_cell_t *libmawk_cfunc_arg(mawk_cell_t *sp, int num_args, int n) +{ + if ((n >= 0) && (n < num_args)) + return sp - (num_args - n - 1); + return NULL; +} + +mawk_cell_t *libmawk_cfunc_ret(mawk_cell_t *sp, int num_args) +{ + return sp - num_args + 1; +} + +SYMTAB *libmawk_register_array(mawk_state_t *MAWK, const char *name, array_imp_t *arr_imp) +{ + SYMTAB *s; + + /* register a variable only if it's not already in the hash */ + s = mawk_find(MAWK, name, 0); + if (s != NULL) + return NULL; + + s = mawk_insert(MAWK, mawk_strdup(MAWK, name)); + memset(&(s->stval), 0, sizeof(s->stval)); + s->type = ST_ARRAY; + s->offset = 0; + s->stval.array = mawk_array_new(MAWK, arr_imp); + return s; +} + +SYMTAB *libmawk_register_scalar(mawk_state_t *MAWK, const char *name, mawk_celltype_t type, void *val) +{ + SYMTAB *s; + + switch(type) { + case C_NUM: + case C_STRING: + break; + default: + return NULL; + } + + /* register a variable only if it's not already in the hash */ + s = mawk_find(MAWK, name, 0); + if (s != NULL) + return NULL; + + s = mawk_insert(MAWK, mawk_strdup(MAWK, name)); + memset(&(s->stval), 0, sizeof(s->stval)); + s->type = ST_VAR; + s->offset = 0; + s->stval.cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); + s->stval.cp->type = type; + switch(type) { + case C_NUM: + if (val != NULL) + s->stval.cp->d.dval = *(mawk_num_t *)val; + else + s->stval.cp->d.dval = MAWK_NUM_ZERO; + break; + case C_STRING: + if (val != NULL) + s->stval.cp->ptr = (void *)mawk_new_STRING(MAWK, (char *)val); + else + s->stval.cp->ptr = (void *)mawk_new_STRING(MAWK, ""); + break; + default: + mawk_bozo(MAWK, "libmawk_register_scalar: unsupported type"); + } + return s; +} + +mawk_num_t libmawk_cell2num(mawk_state_t *MAWK, const mawk_cell_t *cp) +{ + mawk_cell_t tmp; + mawk_cellcpy(MAWK, &tmp, cp); + mawk_cast1_to_num(MAWK, &tmp); + /* NOTE: no need to destroy tmp: it's a number for sure, numbers are not allocated */ + return tmp.d.dval; +} + + +int libmawk_cell2int(mawk_state_t *MAWK, const mawk_cell_t *cp) +{ + return (int)libmawk_cell2num(MAWK, cp); +} + +double libmawk_cell2double(mawk_state_t *MAWK, const mawk_cell_t *cp) +{ + return (double)libmawk_cell2num(MAWK, cp); +} + diff --git a/src/libmawk/libmawk.h b/src/libmawk/libmawk.h new file mode 100644 index 0000000..d2f0526 --- /dev/null +++ b/src/libmawk/libmawk.h @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* initialize a new maw_state_t, load script, run BEGIN blocks */ +mawk_state_t *libmawk_initialize(int argc, char *argv[]); + +/* Or the same in 3 stages: + - stage 1 creates mawk state + - stage 2 processes CLI args and loads and parses scripts + - stage 3 runs BEGIN blocks + + Creating "builtins" should happen between stage 1 and 2. + Injecting script (from elsewhere than argv[]) should happen between + stage 1 and 2. It is okay to not specify any script (or any argument) + in stage2. +*/ +mawk_state_t *libmawk_initialize_stage1(void); +int libmawk_initialize_stdio(mawk_state_t *m, int stdin_apps, int stdout_apps, int stderr_apps); /* set up stdio; where *_apps is zero use a pipe, where non-zero bind to app's */ +mawk_state_t *libmawk_initialize_stage2(mawk_state_t *m, int argc, char *argv[]); +mawk_state_t *libmawk_initialize_stage3(mawk_state_t *m); + +/* execute exit(0) in the script and free up all memory used by m */ +void libmawk_uninitialize(mawk_state_t *m); + +/* Or the same in 3 stages: + - stage 1 executes exit(0) (executing END { } if not already within END) + - stage 2 free m + Between stage 1 and stage 2 the application may read states changed by + the script in END +*/ +void libmawk_uninitialize_stage1(mawk_state_t *m); +void libmawk_uninitialize_stage2(mawk_state_t *m); + + + +/* run all main blocks until input runs out */ +void libmawk_run_main(mawk_state_t *m); + +/* append \0 terminated string to the input buffer; returns -1 on error */ +int libmawk_append_input(mawk_state_t *m, const char *input_str); + +/* append data to the input buffer; returns -1 on error */ +int libmawk_append_ninput(mawk_state_t *m, const char *input, int len); + +/* close the input buffer (eof to the script) in case it was a pipe + between the app and awk - else don't do anything */ +int libmawk_close_input(mawk_state_t *m); + + +/* set value of a cell */ +mawk_cell_t *libmawk_set_cell(mawk_state_t *MAWK, mawk_cell_t *cell, const char argtype, ...); + +/* set value of a cell from a pointer */ +mawk_cell_t *libmawk_set_cellp(mawk_state_t *MAWK, mawk_cell_t *cell, const char argtype, void *argp); + +/* call an awk function; argtype is a string consists of a character for + each argument depending on the argument type: + - d for integer + - f for float + - s for string + If res is not NULL, it is destroyed (regardless of the return value) + and the result cell is copied in res and res needs to be cell_destroyed + by the caller. + Returns 0 on success or -1 on error. +*/ +mawk_exec_result_t libmawk_call_function(mawk_state_t *MAWK, const char *fname, mawk_cell_t *retc, const char *argtypes, ...); + +/* Same as libmawk_call_function, except this one takes an array of pointer to the arguments */ +mawk_exec_result_t libmawk_call_functionp(mawk_state_t *MAWK, const char *fname, mawk_cell_t *retc, const char *argtypes, void **args); + +/* Same as libmawk_call_function, except this one takes an array of pointer to read-only cells */ +mawk_exec_result_t libmawk_call_functionc(mawk_state_t *MAWK, const char *fname, mawk_cell_t *retc, int argc, const mawk_cell_t *argv); + +/* free an allocated cell (use after libmawk_call_function */ +void libmawk_cell_destroy(mawk_state_t *MAWK, mawk_cell_t *c); + + +/* convert a cell to string in buff, return pointer to buff */ +char *libmawk_print_cell(mawk_state_t *MAWK, const mawk_cell_t *c, char *buff, int buffsize); + +/* resolve a variable by name; the caller shouldn't change anything on the cell because of + the possible side effects required on write. The variable is not necessarily + scalar. */ +const mawk_cell_t *libmawk_get_var(mawk_state_t *MAWK, const char *vname); + +/* resolve an element of an array by array name and index and returns 1 + if the element is found. If result is non-NULL, the member mawk_cell_t is copied + there. Changing result will not affect the value in the actual array and + the caller is responsible for destroying the cell. + If arr_name doesn't name an array or upon other error, return value is -1 + On succes returns 1 + NOTE: if result is non-NULL, it is destroyed. + If create is non-zero, create non-existing element with empty value + */ +int libmawk_get_array_at(mawk_state_t *MAWK, const char *arr_name, const char *idx, mawk_cell_t *result, int create); + +/* set array at a specific index; valtype/val semantics are the same as for + the libmawk_cell_set*() */ +int libmawk_set_array_atv(mawk_state_t *MAWK, const char *arr_name, const char *idx, const char valtype, va_list *ap); +int libmawk_set_array_atp(mawk_state_t *MAWK, const char *arr_name, const char *idx, const char valtype, void *val); +int libmawk_set_array_at(mawk_state_t *MAWK, const char *arr_name, const char *idx, const char valtype, ...); + +/* same for scalars */ +int libmawk_set_scalarv(mawk_state_t *MAWK, const char *var_name, const char valtype, va_list *ap); +int libmawk_set_scalarp(mawk_state_t *MAWK, const char *var_name, const char valtype, void *val); +int libmawk_set_scalar(mawk_state_t *MAWK, const char *var_name, const char valtype, ...); + +/* register a new function implemented in callback. Returns 0 on success. */ +int libmawk_register_function(mawk_state_t *MAWK, const char *fname, libmawk_c_function *callback); + +/* calculate the cell pointer of arugmnet number n on the stack from within + a C function called back from awk */ +mawk_cell_t *libmawk_cfunc_arg(mawk_cell_t *sp, int num_args, int n); + +/* calculate the cell pointer of the return value on the stack from within + a C function called back from awk */ +mawk_cell_t *libmawk_cfunc_ret(mawk_cell_t *sp, int num_args); + +/* *** register new variables _before stage2_ *** */ +/* register a new array; if arr_imp is NULL, the generic (_orig) array + implementation is used, else the virtualized array hooks provided by imp */ +SYMTAB *libmawk_register_array(mawk_state_t *MAWK, const char *name, array_imp_t *arr_imp); +/* register a scalar; type must be C_STRING (val is a char *) or C_NUM (val + is a mawk_num_t *) */ +SYMTAB *libmawk_register_scalar(mawk_state_t *MAWK, const char *name, mawk_celltype_t type, void *val); + + +/* return the numeric value of a cell in mawk's internal number type or + int or double */ +mawk_num_t libmawk_cell2num(mawk_state_t *MAWK, const mawk_cell_t *cp); +int libmawk_cell2int(mawk_state_t *MAWK, const mawk_cell_t *cp); +double libmawk_cell2double(mawk_state_t *MAWK, const mawk_cell_t *cp); + +/* empty mawk_cell_t initializer - cells should start out with this value */ +#define libmawk_empty_cell {0, NULL, {0}} diff --git a/src/libmawk/main.c b/src/libmawk/main.c new file mode 100644 index 0000000..1770dfe --- /dev/null +++ b/src/libmawk/main.c @@ -0,0 +1,65 @@ + +/******************************************** +main.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "init.h" +#include "code.h" +#include "files.h" +#include "debug.h" +#include "viohack.h" +#include "vio_orig.h" + +int main(int argc, char **argv) +{ + mawk_state_t *m; + int err = 0; + m = mawk_initialize(argc, argv, mawk_vio_orig_init); + if (m != NULL) { + mawk_vio_orig_setup_stdio(m, 1, 1, 1); + mawk_detect_interactive(m); +#ifndef MAWK_NO_COMP + mawk_parse(m); + if (m->compile_error_count != 0) + err = 1; + m->mawk_state = EXECUTION; +#ifndef MAWK_NO_EXEC + if (m->debug_symbols) + mawk_debug_callstack_push(m, &mawk_debug_begin); +#endif + if ((m->compile_error_count == 0) && (!m->do_exit)) +#endif +#ifndef MAWK_NO_EXEC + mawk_execute(m, m->execution_start, m->eval_stack - 1, 0); +#endif + +#ifndef MAWK_NO_EXEC + if (m->debug_symbols) + mawk_debug_callstack_pop(m); +#endif + + err = m->final_exit_code; + mawk_uninitialize(m); + } + else + err = 1; + + return err; +} + +void mawk_exit_(mawk_state_t *MAWK, int x) +{ + MAWK->do_exit = 1; + MAWK->final_exit_code = MAWK->rt_exit_code != 0 ? MAWK->rt_exit_code : x; +} diff --git a/src/libmawk/makescan.c b/src/libmawk/makescan.c new file mode 100644 index 0000000..5ca9eb2 --- /dev/null +++ b/src/libmawk/makescan.c @@ -0,0 +1,101 @@ + +/******************************************** +makescan.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* source for makescan.exe which builds the scancode[] + via: makescan.exe > scancode.c +*/ + +#define MAKESCAN + +#include "scan.h" +#include "mawk.h" + +void mawk_scan_init(mawk_state_t * MAWK) +{ + register char *p; + + memset(MAWK->scan_code, SC_UNEXPECTED, sizeof(MAWK->scan_code)); + for (p = MAWK->scan_code + '0'; p <= MAWK->scan_code + '9'; p++) + *p = SC_DIGIT; + MAWK->scan_code[0] = 0; + MAWK->scan_code[' '] = MAWK->scan_code['\t'] = MAWK->scan_code['\f'] = SC_SPACE; + MAWK->scan_code['\r'] = MAWK->scan_code['\013'] = SC_SPACE; + + MAWK->scan_code[';'] = SC_SEMI_COLON; + MAWK->scan_code['\n'] = SC_NL; + MAWK->scan_code['{'] = SC_LBRACE; + MAWK->scan_code['}'] = SC_RBRACE; + MAWK->scan_code['+'] = SC_PLUS; + MAWK->scan_code['-'] = SC_MINUS; + MAWK->scan_code['*'] = SC_MUL; + MAWK->scan_code['/'] = SC_DIV; + MAWK->scan_code['%'] = SC_MOD; + MAWK->scan_code['^'] = SC_POW; + MAWK->scan_code['('] = SC_LPAREN; + MAWK->scan_code[')'] = SC_RPAREN; + MAWK->scan_code['_'] = SC_IDCHAR; + MAWK->scan_code['='] = SC_EQUAL; + MAWK->scan_code['#'] = SC_COMMENT; + MAWK->scan_code['\"'] = SC_DQUOTE; + MAWK->scan_code[','] = SC_COMMA; + MAWK->scan_code['!'] = SC_NOT; + MAWK->scan_code['<'] = SC_LT; + MAWK->scan_code['>'] = SC_GT; + MAWK->scan_code['|'] = SC_OR; + MAWK->scan_code['&'] = SC_AND; + MAWK->scan_code['?'] = SC_QMARK; + MAWK->scan_code[':'] = SC_COLON; + MAWK->scan_code['['] = SC_LBOX; + MAWK->scan_code[']'] = SC_RBOX; + MAWK->scan_code['\\'] = SC_ESCAPE; + MAWK->scan_code['.'] = SC_DOT; + MAWK->scan_code['~'] = SC_MATCH; + MAWK->scan_code['$'] = SC_DOLLAR; + + for (p = MAWK->scan_code + 'A'; p <= MAWK->scan_code + 'Z'; p++) + *p = *(p + 'a' - 'A') = SC_IDCHAR; + +} + +void scan_print(mawk_state_t * MAWK) +{ + register char *p = MAWK->scan_code; + register int c; /* column */ + register int r; /* row */ + + printf("\n\n/* scancode.c */\n\n\n"); + printf("const char mawk_scan_code[256] = {\n"); + + for (r = 1; r <= 16; r++) { + for (c = 1; c <= 16; c++) { + printf("%2d", *p++); + if (r != 16 || c != 16) + putchar(','); + } + putchar('\n'); + } + + printf("} ;\n"); +} + + +int main(int argc, char **argv) +{ + mawk_state_t m, *MAWK = &m; + mawk_scan_init(MAWK); + scan_print(MAWK); + return 0; +} diff --git a/src/libmawk/man/gendoc.sh b/src/libmawk/man/gendoc.sh new file mode 100755 index 0000000..3b8974f --- /dev/null +++ b/src/libmawk/man/gendoc.sh @@ -0,0 +1,23 @@ +#!/bin/sh +docdir=../../../doc +PAGES="example.7libmawk +libmawk_append_input.3libmawk +libmawk_call_function.3libmawk +libmawk_cell_destroy.3libmawk +libmawk_get_var.3libmawk +libmawk_initialize.3libmawk +libmawk_initialize_stage.3libmawk +libmawk_register_function.3libmawk +libmawk_run_main.3libmawk +libmawk_set_cell.3libmawk +libmawk_uninitialize.3libmawk +lmawk.1" + +cat input/example1 ../../testapp/main.c input/example2 > example.7libmawk + +for n in $PAGES +do + groff -c -Tlatin1 -mandoc $n > $docdir/$n.txt + groff -c -Thtml -mandoc $n > $docdir/$n.html +done + diff --git a/src/libmawk/man/input/example1 b/src/libmawk/man/input/example1 new file mode 100644 index 0000000..64d933e --- /dev/null +++ b/src/libmawk/man/input/example1 @@ -0,0 +1,48 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH EXAMPLE 7 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk example \- how to use the library +.SH SYNOPSIS +.nf +.B #include +.sp +.SH DESCRIPTION +Libmawk is a library that lets applications to embed awk scripts using the +code of the popular implementation +.B mawk. +The normal process is to call libmawk_initialize() to set up a new mawk +context (with script(s) loaded), then in the main loop feed it using +libmawk_append_input(). For "out of band" communication, the program +may also call functions implemented in awk and read (or modify) global +variables of the awk script. The hos tapplication usally will also bind +some of its functions to the context using libmawk_register_function, which +allows the awk script to call the host applicaiton's functions directly as +they were awk builtins or user defined functions. After the main loop, the +application destroys the context freeing up all memory allocated for +the script(s). +.sp +One context is for one awk program. One awk program may consist of multiple +script files (just as with command line awk, with multiple -f filename +arguments). Libmawk is instance safe, the host application may create +multiple instances of contexts with the same or with different set of +awk scripts loaded. These contexts are totally separate, no variables, +functions or any sort of states are shared. However, the host application +may provide means of communication between those scripts by custom functions +or by copying variable contents between them. +.SH Example application +The following example application creates a single context to demonstrate +all the above mentioned functionality. +.nf diff --git a/src/libmawk/man/input/example2 b/src/libmawk/man/input/example2 new file mode 100644 index 0000000..83928dd --- /dev/null +++ b/src/libmawk/man/input/example2 @@ -0,0 +1 @@ +.fi diff --git a/src/libmawk/man/libmawk_append_input.3libmawk b/src/libmawk/man/libmawk_append_input.3libmawk new file mode 100644 index 0000000..5cf3762 --- /dev/null +++ b/src/libmawk/man/libmawk_append_input.3libmawk @@ -0,0 +1,52 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_APPEND_INPUT 3 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk_append_input \- append a string to an input buffer +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "void libmawk_append_input(mawk_state_t *" m ", const char *" input_str ); +.fi +.sp +.BI "void libmawk_append_ninput(mawk_state_t *" m ", const char *" input ", int" len ); +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_append_input () +and +.BR libmawk_append_ninput () +functions allow the application to fill the input buffer of a libmawk context. +No record separator is appended, only the bytes donated by input_str or input, +thus it is possible to append partial records. Appending to +the input doesn't have the side effect of any script being run. There may be +multiple libmawk_append_input() calls before a call to libmawk_run_main(). The +latter all is used to let the script process the input buffer. +.sp +The only difference between the two calls are the input format: +.BR libmawk_append_input () +expects a nul-terminated string, whereas +.BR libmawk_append_ninput () +takes an arbitrary binary data and its length. +.sp +Argument m is a libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3(). +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), +.BR libmawk_run_main (3libmawk). + diff --git a/src/libmawk/man/libmawk_call_function.3libmawk b/src/libmawk/man/libmawk_call_function.3libmawk new file mode 100644 index 0000000..e4faf65 --- /dev/null +++ b/src/libmawk/man/libmawk_call_function.3libmawk @@ -0,0 +1,56 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_CALL_FUNCTION 3 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk_call_function \- call an user defined (script) function +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int libmawk_call_function(mawk_state_t *" MAWK ", const char *" fname ", CELL *" res ", const char *" argtpes ", ...);" +.fi +.BI "int libmawk_call_functionp(mawk_state_t *" MAWK ", const char *" fname ", CELL *" res ", const char *" argtpes ", void **args);" +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_call_function () +function looks up an user defined awk function called +.I fname +, fills the stack with arguments converted from the varargs and calls +the function. +The +.BR libmawk_call_functionp () +performs the same action but avoids using vararg by requiring an array of +generic pointers to the function arguments. +.sp +Argtype is a zero terminated string for both functions, each character corresponding +to an argument. Type characters are described in libmawk_set_cell() manual page. +.sp +If res is non-NULL, it is cell_destroyed (regardless of errors) and the +return value of the user function is copied into it. The caller shall +run libmawk_cell_destroy on it. +.sp +Argument m is a libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3(). + +.SH "RETURN VALUE" +A pointer to the cell returned by the user function. The cell returned\ +must be destroyed by the application using libmawk_cell_destroy. +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), +.BR libmawk_cell_destroy (3libmawk), +.BR libmawk_set_cell (3libmawk), diff --git a/src/libmawk/man/libmawk_cell_destroy.3libmawk b/src/libmawk/man/libmawk_cell_destroy.3libmawk new file mode 100644 index 0000000..4135a0a --- /dev/null +++ b/src/libmawk/man/libmawk_cell_destroy.3libmawk @@ -0,0 +1,39 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_CELL_DESTROY 3 2009-08-12 "libmawk" "libmawk manual" +.SH NAME +libmawk_cell_destroy \- free all memory associated with a cell +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "void libmawk_cell_destroy(mawk_state_t *" m ", CELL *" c ");" +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_cell_destroy () +function frees all memory allocated to store a mawk cell. It is useful +with some of the libmawk calls that return a newly allocated cell, such as +the libmawk_call_function() call. +.sp +Argument m is a libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3(). + +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), +.BR libmawk_call_function (3libmawk). + diff --git a/src/libmawk/man/libmawk_get_var.3libmawk b/src/libmawk/man/libmawk_get_var.3libmawk new file mode 100644 index 0000000..f8ed301 --- /dev/null +++ b/src/libmawk/man/libmawk_get_var.3libmawk @@ -0,0 +1,64 @@ +.\" Copyright 2009..2014 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_GET_VAR 3 2009-08-12 "libmawk" "libmawk manual" +.SH NAME +libmawk_get_var \- returns a pointer to a mawk variable +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "const CELL *libmawk_get_var(mawk_state_t *" m ", const char *" vname ");" +.BI "int libmawk_get_array_at(mawk_state_t *" m ", const char *" arr_name ", +.BI " const char *" idx ", const char *" res ", int " alloc ");" +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_get_var () +function returns a pointer to a mawk cell that represents the global variable +with name passed in +.I vname +in the given context. The returned CELL should never be free'd or destroyed +or modified. +Function libmawk_print_cell may be used for converting the cell to string. +.sp +Function +.BR libmawk_get_array_at () +performs the same operation for an element of an array. -1 is returned if +.I arr_name +is not an array or upon an error. If +.I idx +is not an existing index in the array it is allocated if +.I alloc +is non-zero. If +.I res +is not NULL, it is destroyed (regardless of the return value) and if +the index exists (or is created by the call), is loaded with the value. +The caller needs to destroy +.I res +after use. Since +.I res +is destroyed when non-NULL, it must be a valid cell with valid type. +.sp +Argument m is a libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3(). + +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), +.BR libmawk_call_function (3libmawk), +.BR libmawk_print_cell (3libmawk). + + diff --git a/src/libmawk/man/libmawk_initialize.3libmawk b/src/libmawk/man/libmawk_initialize.3libmawk new file mode 100644 index 0000000..7394acc --- /dev/null +++ b/src/libmawk/man/libmawk_initialize.3libmawk @@ -0,0 +1,37 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_INITIALIZE 3 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk_initialize \- create a new libmawk context +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "mawk_state_t *libmawk_initialize(int " s ", char *" argv[] ); +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_initialize () +function returns a pointer to a newly created libmawk context. Any amount +of libmawk contexts can live in parallel in an application. Arguments are the +same as for a command line mawk session. Scripts are loaded (either from command +line or from files using -f), variables are set (with -v), special options +are set (with -W), etc. +.SH "RETURN VALUE" +A pointer to a new libmawk context or NULL on error. +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_uninitialize (3libmawk), diff --git a/src/libmawk/man/libmawk_initialize_stage.3libmawk b/src/libmawk/man/libmawk_initialize_stage.3libmawk new file mode 100644 index 0000000..70f1497 --- /dev/null +++ b/src/libmawk/man/libmawk_initialize_stage.3libmawk @@ -0,0 +1,73 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_INITIALIZE_STAGE 3 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk_initialize_stage* \- create a new libmawk context in 3 stages +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "mawk_state_t *libmawk_initialize_stage1(void); +.sp +.BI "mawk_state_t *libmawk_initialize_stage2(mawk_state_t * " m, int " s ", char *" argv[] ); +.sp +.BI "mawk_state_t *libmawk_initialize_stage3(mawk_state_t * " m ); +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_initialize_stage* () +functions together do the same as libmawk_initialize() but allows +the application to take actions between different stages. +.BR libmawk_initialize_stage1 () +returns a pointer to a newly created libmawk context. Any amount +of libmawk contexts can live in parallel in an application. +.sp +.BR libmawk_initialize_stage2 () +can be called after a succesful stage1 call. + Stage2 is responsible for processing the command line arguments and +loading any script. + +Arguments are the +same as for a command line mawk session. Scripts are loaded (either from command +line or from files using -f), variables are set (with -v), special options +are set (with -W), etc. Unlike with libmawk_initialize(), the application may +decide not to provide any script at this stage. All command line arguments +are processed. + +The most common case is that +the application calls stage1 with no script, then already having a context +makes some manipulations on it (for example registers some C functions that +would be already called in the BEGIN part of the script that will be later +loaded). Optionally before calling stage2 the application loads the actual +script(s) using mawk_append_input_file(). +.sp +.BR libmawk_initialize_stage3 () +is called as a final step of the three-stage initialization process. Stage3 +is responsible for running all the BEGIN parts of all scripts loaded at +stage1 or stage2. It is useful to have stage3 in a separate call to allow +applications to manipulate the context right before initializing the scripts. +.sp +Stage2 gets the pointer returned by stage1 and stage3 gets the pointer +returned by stage2. Subsequent calls to libmawk functions should get +the pointer returned by stage3. + +.SH "RETURN VALUE" +At stage 1 a pointer to a new libmawk context or NULL on error. Subsequent +stages will return the same pointer or NULL on error. +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_uninitialize (3libmawk), +.BR mawk_append_input_file(3libmawk). diff --git a/src/libmawk/man/libmawk_register_function.3libmawk b/src/libmawk/man/libmawk_register_function.3libmawk new file mode 100644 index 0000000..296f0c2 --- /dev/null +++ b/src/libmawk/man/libmawk_register_function.3libmawk @@ -0,0 +1,70 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_REGISTER_FUNCTION 3 2009-08-12 "libmawk" "libmawk manual" +.SH NAME +libmawk_register_function \- registers a C function with a callback +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "typedef CELL *libmawk_c_function(mawk_state_t *" m ", CELL *" sp ", int " a_args ");" +.fi +.BI "int libmawk_register_function(mawk_state_t *" MAWK ", const char *" fname ", libmawk_c_function *" callback ");" +.fi +.BI "CELL *libmawk_stackret(CELL *" original_sp ");" +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_register_function () +call registers an user defined function donated by the host application in +a mawk context so that it acts exactly like user defined functions in +written in awk. The name of the new function is given in +.I fname +and should not match any of the user defined function names in the awk +script. +.sp +When the user function is called back, argument +.I sp +is the stack pointer and +.I a_args +holds the number of arguments. The user function is responsible for managing +the stack: it should pop all arguments before returning. +.sp +The user function should also generate a return value, which is done by +calling libmawk_set_cell() on the stack slot returned by libmawk_stackret. +Libmawk_stackret should be called with the modified +.I sp +after popping all arguments. +.sp +Argument m is a libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3(). +.sp +For more information about user function callbacks, especially on stack handling, +see manual page example(3libmawk). +.SH "RETURN VALUE" +The user function should return the stack pointer after popping all arguments. +.sp +The libmawk_register_function call returns 0 on success. +.sp +Call libmawk_stackret returns a stack pointer to the slot where the user function should store its return value. + +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), +.BR libmawk_set_cell (3libmawk), +.BR libmawk_print_cell (3libmawk). + + diff --git a/src/libmawk/man/libmawk_run_main.3libmawk b/src/libmawk/man/libmawk_run_main.3libmawk new file mode 100644 index 0000000..674316d --- /dev/null +++ b/src/libmawk/man/libmawk_run_main.3libmawk @@ -0,0 +1,41 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_RUN_MAIN 3 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk_run_main \- run main parts of a script +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "void libmawk_run_main(mawk_state_t *" m ); +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_run_main () +attempts to take and parse the next input record and runs all main +parts of the script that matches. If there are multiple full records +in the input buffer, the process repeats until the buffer becomes empty +or contains a partial record. If there is no full record in the buffer, +the call returns with nothing done. The call itself never blocks, but the +script may. The input buffer may be filled using the libmawk_append_input() +call. +.sp +Argument m is a libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3(). +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), +.BR libmawk_append_input (3libmawk), diff --git a/src/libmawk/man/libmawk_set_cell.3libmawk b/src/libmawk/man/libmawk_set_cell.3libmawk new file mode 100644 index 0000000..775f293 --- /dev/null +++ b/src/libmawk/man/libmawk_set_cell.3libmawk @@ -0,0 +1,55 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_SET_CELL 3 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk_set_cell \- set the value of a mawk cell. +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "CELL *libmawk_set_cell(mawk_state_t *" m ", CELL *" cell ", const char" argtype "," ... ); +.fi +.BI "CELL *libmawk_set_cellp(mawk_state_t *" m ", CELL *" cell ", const char" argtype ", void *" argp ); +.sp +.SH DESCRIPTION +The +.BR libmawk_set_cell () +function modifies the value of a mawk cell (variable). Argumetn argtype is a +format character that describes the type of the payload (accessed trough vararg). +.sp +The +.BR libmawk_set_cellp () +function performs the same action but accepts a generic pointer to the payload. +.sp +.B "Format character" +is one of the followings: +.in +4n +.TP + 'd' for int payload +.TP + 'f' for double payload +.TP + 's' for (zero terminated) char * payload. +.in +.sp +Argument m is a libmawk context previously returned by libmawk_initialize() +or libmawk_initialize_stage3(). + +.SH "RETURN VALUE" +A pointer to the cell modified. +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), +.BR libmawk_get_var (3libmawk). diff --git a/src/libmawk/man/libmawk_uninitialize.3libmawk b/src/libmawk/man/libmawk_uninitialize.3libmawk new file mode 100644 index 0000000..520cc8a --- /dev/null +++ b/src/libmawk/man/libmawk_uninitialize.3libmawk @@ -0,0 +1,33 @@ +.\" Copyright 2009 Tibor Palinkas (mawk@inno.bme.hu) +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of this +.\" manual under the conditions for verbatim copying, provided that the +.\" entire resulting derived work is distributed under the terms of a +.\" permission notice identical to this one. +.\" +.\" Formatted or processed versions of this manual, if unaccompanied by +.\" the source, must acknowledge the copyright and authors of this work. +.\" +.TH LIBMAWK_UNINITIALIZE 3 2009-08-10 "libmawk" "libmawk manual" +.SH NAME +libmawk_uninitialize \- destroy a libmawk context +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "void libmawk_uninitialize(mawk_state_t * " m ); +.fi +.sp +.SH DESCRIPTION +The +.BR libmawk_uninitialize () +function destroys a context previously created using libmawk_initialize() +or libmawk_initialize_stage1() call. It unloads scripts and frees all memory +of the context. +.SH "SEE ALSO" +.BR libmawk_initialize_stage (3libmawk), +.BR libmawk_initialize (3libmawk), diff --git a/src/libmawk/man/lmawk.1 b/src/libmawk/man/lmawk.1 new file mode 100644 index 0000000..0e81697 --- /dev/null +++ b/src/libmawk/man/lmawk.1 @@ -0,0 +1,1728 @@ +.TH LMAWK 1 "Dec 12 2010" "Version 1.2" "USER COMMANDS" +.\" strings +.ds ex \fIexpr\fR +.SH NAME +lmawk \- pattern scanning and text processing language +.SH SYNOPSIS +.B lmawk +[\-\fBW +.IR option ] +[\-\fBF +.IR value ] +[\-\fBv +.IR var=value ] +[\-\|\-] 'program text' [file ...] +.br +.B lmawk +[\-\fBW +.IR option ] +[\-\fBF +.IR value ] +[\-\fBv +.IR var=value ] +[\-\fBf +.IR program-file ] +[\-\|\-] [file ...] +.SH DESCRIPTION +.B lmawk +is an interpreter for the AWK Programming Language derived from mawk. +The AWK language +is useful for manipulation of data files, +text retrieval and processing, +and for prototyping and experimenting with algorithms. +.B lmawk +is a \fInew awk\fR meaning it implements the AWK language as +defined in Aho, Kernighan and Weinberger, +.I "The AWK Programming Language," +Addison-Wesley Publishing, 1988. (Hereafter referred to as +the AWK book.) +.B mawk +conforms to the Posix 1003.2 +(draft 11.3) +definition of the AWK language +which contains a few features not described in the AWK +book, and +.B mawk +provides a small number of extensions. +.PP +An AWK program is a sequence of \fIpattern {action}\fR pairs and +function definitions. +Short programs are entered on the command line +usually enclosed in ' ' to avoid shell +interpretation. +Longer programs can be read in from a +file with the \-f option. +Data input is read from the list of files on +the command line or from standard input when the list is empty. +The input is broken into records as determined by the +record separator variable, \fBRS\fR. Initially, +.B RS += "\en" and records are synonymous with lines. +Each record is compared against each +.I pattern +and if it matches, the program text for +.I "{action}" +is executed. +.SH OPTIONS +.TP \w'\-\fBW'u+\w'\fRsprintf=\fInum\fR'u+2n +\-\fBF \fIvalue\fP +sets the field separator, \fBFS\fR, to +.IR value . +.TP +\-\fBf \fIfile +Program text is read from \fIfile\fR instead of from the +command line. Multiple +.B \-f +options are allowed. As a libmawk extension, if file name starts with +plus ('+'), it is not loaded if the same file has been loaded already +by a previous -f or include from any of the scripts already loaded. +.TP +\-\fBb \fIfile +Program bytecode is read from \fIfile\fR . Multiple +.B \-b +options are allowed. Bytecode can be generated using -Wcompile. Libmawk +may refuse to load bytecode generated on a different system if byte order, +type sizes or dump version differs. +.TP +\-\fBv \fIvar=value\fR +assigns +.I value +to program variable +.IR var . +.TP +\-\|\- +indicates the unambiguous end of options. +.PP +The above options will be available with any Posix compatible +implementation of AWK, and implementation specific options are +prefaced with +.BR \-W . +.B lmawk +provides six: +.TP \w'\-\fBW'u+\w'\fRsprintf=\fInum\fR'u+2n +\-\fBW \fRversion +.B lmawk +writes its version and copyright +to stdout and compiled limits to +stderr and exits 0. +.TP +\-\fBW \fRdebug +include location info in the compiled code; location information is visible +in the dump and when debugging libmawk. +.TP +\-\fBW \fRdump +writes an assembler like listing of the internal +representation of the program to stdout and exits 0 +(on successful compilation). +.TP +\-\fBW \fRdumpsym +writes a list of global symbols to stdout and exits 0 +(on successful compilation). +.TP +\-\fBW \fRcompile +writes a binary dump of the bytecode to stdout. This bytecode can be +loaded using the +\-\fBb \fRswitch. +.TP +\-\fBW \fRinteractive +sets unbuffered writes to stdout and line buffered reads from stdin. +Records from stdin are lines regardless of the value of +.BR RS . +.TP +\-\fBW \fRmaxmem=\fInum\fR +limit dynamic memory allocation during compilation and execution to +.I num +bytes and exit with out-of-the-memory error if more memory is to be allocated. +Optional suffixes are k for kilobyte and m for megabyte. 0 means unlimited, +which is also the default. +.TP +\-\fBW \fRexec \fIfile +Program text is read from +.I file +and this is the last option. Useful on systems that support the +.B #! +"magic number" convention for executable scripts. +.TP +\-\fBW \fRsprintf=\fInum\fR +adjusts the size of +.B lmawk's +internal sprintf buffer to +.I num +bytes. More than rare use of this option indicates +.B lmawk +should be recompiled. +.TP +\-\fBW \fRposix_space +forces +.B lmawk +not to consider '\en' to be space. +.PP +The short forms +.BR \-W [vdiesp] +are recognized and on some systems \fB\-W\fRe is mandatory to avoid +command line length limitations. +.SH "THE AWK LANGUAGE" +.SS "\fB1. Program structure" +An AWK program is a sequence of +.I "pattern {action}" +pairs and user +function definitions. +.PP +A pattern can be: +.nf +.RS +\fBBEGIN +END\fR +expression +expression , expression +.sp +.RE +.fi +One, but not both, +of \fIpattern {action}\fR can be omitted. If +.I {action} +is omitted it is implicitly { print }. If +.I pattern +is omitted, then it is implicitly matched. +.B BEGIN +and +.B END +patterns require an action. +.PP +Statements are terminated by newlines, semi-colons or both. +Groups of statements such as +actions or loop bodies are blocked via { ... } as in C. The +last statement in a block doesn't need a terminator. Blank lines +have no meaning; an empty statement is terminated with a +semi-colon. Long statements +can be continued with a backslash, \e\|. A statement can be broken +without a backslash after a comma, left brace, &&, ||, +.BR do , +.BR else , +the right parenthesis of an +.BR if , +.B while +or +.B for +statement, and the +right parenthesis of a function definition. +A comment starts with # and extends to, but does not include +the end of line. +.PP +The following statements control program flow inside blocks. +.RS +.PP +.B if +( \*(ex ) +.I statement +.PP +.B if +( \*(ex ) +.I statement +.B else +.I statement +.PP +.B while +( \*(ex ) +.I statement +.PP +.B do +.I statement +.B while +( \*(ex ) +.PP +.B for +( +\fIopt_expr\fR ; +\fIopt_expr\fR ; +\fIopt_expr\fR +) +.I statement +.PP +.B for +( \fIvar \fBin \fIarray\fR ) +.I statement +.PP +.B continue +.PP +.B break +.RE +.\" +.SS "\fB2. Data types, conversion and comparison" +There are two basic data types, numeric and string. +Numeric constants can be integer like \-2, +decimal like 1.08, or in scientific notation like +\-1.1e4 or .28E\-3. All numbers are represented internally and all +computations are done in floating point arithmetic. +So for example, the expression +0.2e2 == 20 +is true and true is represented as 1.0. +.PP +String constants are enclosed in double quotes. +.sp +.ce +"This is a string with a newline at the end.\en" +.sp +Strings can be continued across a line by escaping (\e) the newline. +The following escape sequences are recognized. +.nf +.sp + \e\e \e + \e" " + \ea alert, ascii 7 + \eb backspace, ascii 8 + \et tab, ascii 9 + \en newline, ascii 10 + \ev vertical tab, ascii 11 + \ef formfeed, ascii 12 + \er carriage return, ascii 13 + \eddd 1, 2 or 3 octal digits for ascii ddd + \exhh 1 or 2 hex digits for ascii hh +.sp +.fi +If you escape any other character \ec, you get \ec, i.e., +.B lmawk +ignores the escape. +.PP +There are really three basic data types; the third is +.I "number and string" +which has both a numeric value and a string value +at the same time. +User defined variables come into existence when first referenced +and are initialized to +.IR null , +a number and string value which has numeric value 0 and string value +"". +Non-trivial number and string typed data come from input +and are typically stored in fields. (See section 4). +.PP +The type of an expression is determined by its context and automatic +type conversion occurs if needed. For example, to evaluate the +statements +.nf +.sp + y = x + 2 ; z = x "hello" +.sp +.fi +The value stored in variable y will be typed numeric. +If x is not numeric, +the value read from x is converted to numeric before it is added to +2 and stored in y. The value stored in variable z will be typed +string, and the value of x will be converted to string if necessary +and concatenated with "hello". (Of course, the value and type +stored in x is not changed by any conversions.) +A string expression is converted to numeric using its longest +numeric prefix as with +.IR atof (3). +A numeric expression is converted to string by replacing +.I expr +with +.BR sprintf(CONVFMT , +.IR expr ), +unless +.I expr +can be represented on the host machine as an exact integer then +it is converted to \fBsprintf\fR("%d", \*(ex). +.B Sprintf() +is an AWK built-in that duplicates the functionality of +.IR sprintf (3), +and +.B CONVFMT +is a built-in variable used for internal conversion +from number to string and initialized to "%.6g". +Explicit type conversions can be forced, +\*(ex "" +is string and +.IR expr +0 +is numeric. +.PP +To evaluate, +\*(ex\d1\u \fBrel-op \*(ex\d2\u, +if both operands are numeric or number and string then the comparison +is numeric; if both operands are string the comparison is string; +if one operand is string, the non-string operand is converted and +the comparison is string. The result is numeric, 1 or 0. +.PP +In boolean contexts such as, +\fBif\fR ( \*(ex ) \fIstatement\fR, +a string expression evaluates true if and only if it is not the +empty string ""; +numeric values if and only if not numerically zero. +.\" +.SS "\fB3. Regular expressions" +In the AWK language, records, fields and strings are often +tested for matching a +.IR "regular expression" . +Regular expressions are enclosed in slashes, and +.nf +.sp + \*(ex ~ /\fIr\fR/ +.sp +.fi +is an AWK expression that evaluates to 1 if \*(ex "matches" +.IR r , +which means a substring of \*(ex is in the set of strings +defined by +.IR r . +With no match the expression evaluates to 0; replacing +~ with the "not match" operator, !~ , reverses the meaning. +As pattern-action pairs, +.nf +.sp + /\fIr\fR/ { \fIaction\fR } and\ + \fB$0\fR ~ /\fIr\fR/ { \fIaction\fR } +.sp +.fi +are the same, +and for each input record that matches +.IR r , +.I action +is executed. +In fact, /\fIr\fR/ is an AWK expression that is +equivalent to (\fB$0\fR ~ /\fIr\fR/) anywhere except when on the +right side of a match operator or passed as an argument to +a built-in function that expects a regular expression +argument. +.PP +AWK uses extended regular expressions as with +.IR egrep (1). +The regular expression metacharacters, i.e., those with special +meaning in regular expressions are +.nf +.sp + \ ^ $ . [ ] | ( ) * + ? +.sp +.fi +Regular expressions are built up from characters as follows: +.RS +.TP \w'[^c\d1\uc\d2\uc\d3\u...]'u+1n +\fIc\fR +matches any non-metacharacter +.IR c . +.TP +\e\fIc\fR +matches a character defined by the same escape sequences used +in string constants or the literal +character +.I c +if +\e\fIc\fR +is not an escape sequence. +.TP +\&\. +matches any character (including newline). +.TP +^ +matches the front of a string. +.TP +$ +matches the back of a string. +.TP +[c\d1\uc\d2\uc\d3\u...] +matches any character in the class +c\d1\uc\d2\uc\d3\u... . An interval of characters is denoted +c\d1\u\-c\d2\u inside a class [...]. +.TP +[^c\d1\uc\d2\uc\d3\u...] +matches any character not in the class +c\d1\uc\d2\uc\d3\u... +.RE +.sp +Regular expressions are built up from other regular expressions +as follows: +.RS +.TP \w'[^c\d1\uc\d2\uc\d3\u...]'u+1n +\fIr\fR\d1\u\fIr\fR\d2\u +matches +\fIr\fR\d1\u +followed immediately by +\fIr\fR\d2\u +(concatenation). +.TP +\fIr\fR\d1\u | \fIr\fR\d2\u +matches +\fIr\fR\d1\u or +\fIr\fR\d2\u +(alternation). +.TP +\fIr\fR* +matches \fIr\fR repeated zero or more times. +.TP +\fIr\fR+ +matches \fIr\fR repeated one or more times. +.TP +\fIr\fR? +matches \fIr\fR zero or once. +.TP +(\fIr\fR) +matches \fIr\fR, providing grouping. +.RE +.sp +The increasing precedence of operators is alternation, +concatenation and +unary (*, + or ?). +.PP +For example, +.nf +.sp + /^[_a\-zA-Z][_a\-zA\-Z0\-9]*$/ and + /^[\-+]?([0\-9]+\e\|.?|\e\|.[0\-9])[0\-9]*([eE][\-+]?[0\-9]+)?$/ +.sp +.fi +are matched by AWK identifiers and AWK numeric constants +respectively. Note that . has to be escaped to be +recognized as a decimal point, and that metacharacters are not +special inside character classes. +.PP +Any expression can be used on the right hand side of the ~ or !~ +operators or +passed to a built-in that expects +a regular expression. +If needed, it is converted to string, and then interpreted +as a regular expression. For example, +.nf +.sp + BEGIN { identifier = "[_a\-zA\-Z][_a\-zA\-Z0\-9]*" } + + $0 ~ "^" identifier +.sp +.fi +prints all lines that start with an AWK identifier. +.PP +.B lmawk +recognizes the empty regular expression, //\|, which matches the +empty string and hence is matched by any string at the front, +back and between every character. For example, +.nf +.sp + echo abc | lmawk { gsub(//, "X") ; print } + XaXbXcX +.sp +.fi +.\" +.SS "\fB4. Records and fields" +Records are read in one at a time, and stored in the +.I field +variable +.BR $0 . +The record is split into +.I fields +which are stored in +.BR $1 , +.BR $2 ", ...," +.BR $NF . +The built-in variable +.B NF +is set to the number of fields, +and +.B NR +and +.B FNR +are incremented by 1. +Fields above +.B $NF +are set to "". +.PP +Assignment to +.B $0 +causes the fields and +.B NF +to be recomputed. +Assignment to +.B NF +or to a field +causes +.B $0 +to be reconstructed by +concatenating the +.B $i's +separated by +.BR OFS . +Assignment to a field with index greater than +.BR NF , +increases +.B NF +and causes +.B $0 +to be reconstructed. +.PP +Data input stored in fields +is string, unless the entire field has numeric +form and then the type is number and string. +For example, +.sp +.nf + echo 24 24E | + lmawk '{ print($1>100, $1>"100", $2>100, $2>"100") }' + 0 1 1 1 +.fi +.sp +.B $0 +and +.B $2 +are string and +.B $1 +is number and string. The first comparison is numeric, +the second is string, the third is string +(100 is converted to "100"), +and the last is string. +.\" +.SS "\fB5. Expressions and operators" +.PP +The expression syntax is +similar to C. Primary expressions are numeric constants, +string constants, variables, fields, arrays and function calls. +The identifier +for a variable, array or function can be a sequence of +letters, digits and underscores, that does +not start with a digit. +Variables are not declared; they exist when first referenced and +are initialized to +.IR null . +.PP +New +expressions are composed with the following operators in +order of increasing precedence. +.PP +.RS +.nf +.vs +2p \" open up a little +\fIassignment\fR = += \-= *= /= %= ^= +\fIconditional\fR ? : +\fIlogical or\fR || +\fIlogical and\fR && +\fIarray membership\fR \fBin +\fImatching\fR ~ !~ +\fIrelational\fR < > <= >= == != +\fIconcatenation\fR (no explicit operator) +\fIadd ops\fR + \- +\fImul ops\fR * / % +\fIunary\fR + \- +\fIlogical not\fR ! +\fIexponentiation\fR ^ +\fIinc and dec\fR ++ \-\|\- (both post and pre) +\fIfield\fR $ +.vs +.RE +.PP +.fi +Assignment, conditional and exponentiation associate right to +left; the other operators associate left to right. Any +expression can be parenthesized. +.\" +.SS "\fB6. Arrays" +.ds ae \fIarray\fR[\fIexpr\fR] +Awk provides one-dimensional arrays. Array elements are expressed +as \*(ae. +.I Expr +is internally converted to string type, so, for example, +A[1] and A["1"] are the same element and the actual +index is "1". +Arrays indexed by strings are called associative arrays. +Initially an array is empty; elements exist when first accessed. +An expression, +\fIexpr\fB in\fI array\fR +evaluates to 1 if +\*(ae +exists, else to 0. +.PP +There is a form of the +.B for +statement that loops over each index of an array. +.nf +.sp + \fBfor\fR ( \fIvar\fB in \fIarray \fR) \fIstatement\fR +.sp +.fi +sets +.I var +to each index of +.I array +and executes +.IR statement . +The order that +.I var +transverses the indices of +.I array +is not defined. +.PP +The statement, +.B delete +\*(ae, +causes +\*(ae +not to exist. +.B lmawk +supports an extension, +.B delete +.IR array , +which deletes all elements of +.IR array . +.PP +Multidimensional arrays are synthesized with concatenation using +the built-in variable +.BR SUBSEP . +\fIarray\fR[\fIexpr\fR\d1\u,\|\fIexpr\fR\d2\u] +is equivalent to +\fIarray\fR[\fIexpr\fR\d1\u \fBSUBSEP \fIexpr\fR\d2\u]. +Testing for a multidimensional element uses a parenthesized index, +such as +.sp +.nf + if ( (i, j) in A ) print A[i, j] +.fi +.sp +.\" +.SS "\fB7. Builtin-variables\fR" +.PP +The following variables are built-in and initialized before program +execution. +.RS +.TP \w'FILENAME'u+2n +.B ARGC +number of command line arguments. +.TP +.B ARGV +array of command line arguments, 0..ARGC-1. +.TP +.B CONVFMT +format for internal conversion of numbers to string, +initially = "%.6g". +.TP +.B ENVIRON +array indexed by environment variables. An environment string, +\fIvar=value\fR is stored as +\fBENVIRON\fR[\fIvar\fR] = +.IR value . +.TP +.B FILENAME +name of the current input file. +.TP +.B FNR +current record number in +.BR FILENAME . +.TP +.B FS +splits records into fields as a regular expression. +.TP +.B NF +number of fields in the current record. +.TP +.B NR +current record number in the total input stream. +.TP +.B OFMT +format for printing numbers; initially = "%.6g". +.TP +.B OFS +inserted between fields on output, initially = " ". +.TP +.B ORS +terminates each record on output, initially = "\en". +.TP +.B RLENGTH +length set by the last call to the built-in function, +.BR match() . +.TP +.B RS +input record separator, initially = "\en". +.TP +.B RSTART +index set by the last call to +.BR match() . +.TP +.B SUBSEP +used to build multiple array subscripts, initially = "\e034". +.TP +.B ERRNO +misc built-in functions (libmawk extensions) use this variable to +rerport error. All extension calls will set this variable before returning, +therefor ERRNO holds the result of the last call. An empty string value +means no error. Error messages are formatted in a way that the first word +is an unique integer, followed by a human readable error message from the +second word. int(ERRNO) can be used to acquire the error code, which then +can be used as a secondary output from the extension function. For example, +an awk program can use valueof() to determine if a global symbol exists and +is a function or a variable or anything else. +.TP +.B LIBPATH +is a semicolon separated list of search paths. When loading an awk script by file +name (-f command line argument or include from another awk script) these +paths are inserted before the file name, in order, one by one, until the first +path that allows opening the file. An empty path is equivalent to the current +working directory. LIBPATH can be modified from the command line using -v, as +arguments are scanned before loading the scripts. Setting LIBPATH to +empty string results in the original behaviour of mawk. LIBPATH is ignored +for script file names starting with slash ('/') as those are assumed to be +absolute paths. +.RE +.\" +.SS "\fB8. Built-in functions" +String functions +.RS +.TP +gsub(\fIr,s,t\fR) gsub(\fIr,s\fR) +Global substitution, every match of regular expression +.I r +in variable +.I t +is replaced by string +.IR s . +The number of replacements is returned. +If +.I t +is omitted, +.B $0 +is used. An & in the replacement string +.I s +is replaced by the matched substring of +.IR t . +\e& and \e\e put literal & and \e, respectively, +in the replacement string. +.TP +index(\fIs,t\fR) +If +.I t +is a substring of +.IR s , +then the position where +.I t +starts is returned, else 0 is returned. +The first character of +.I s +is in position 1. +.TP +length(\fIs\fR) +Returns the length of string +.IR s . +.TP +match(\fIs,r\fR) +Returns the index of the first longest match of regular expression +.I r +in string +.IR s . +Returns 0 if no match. +As a side effect, +.B RSTART +is set to the return value. +.B RLENGTH +is set to the length of the match or \-1 if no match. If the +empty string is matched, +.B RLENGTH +is set to 0, and 1 is returned if the match is at the front, and +length(\fIs\fR)+1 is returned if the match is at the back. +.TP +split(\fIs,A,r\fR) split(\fIs,A\fR) +String +.I s +is split into fields by regular expression +.I r +and the fields are loaded into array +.IR A . +The number of fields +is returned. See section 11 below for more detail. +If +.I r +is omitted, +.B FS +is used. +.TP +sprintf(\fIformat,expr-list\fR) +Returns a string constructed from +.I expr-list +according to +.IR format . +See the description of printf() below. +.TP +sub(\fIr,s,t\fR) sub(\fIr,s\fR) +Single substitution, same as gsub() except at most one substitution. +.TP +substr(\fIs,i,n\fR) substr(\fIs,i\fR) +Returns the substring of string +.IR s , +starting at index +.IR i , +of length +.IR n . +If +.I n +is omitted, the suffix of +.IR s , +starting at +.I i +is returned. +.TP +tolower(\fIs\fR) +Returns a copy of +.I s +with all upper case characters converted to lower case. +.TP +toupper(\fIs\fR) +Returns a copy of +.I s +with all lower case characters converted to upper case. +.RE +.PP +Arithmetic functions +.RS +.PP +.nf +atan2(\fIy,x\fR) Arctan of \fIy\fR/\fIx\fR between -PI and PI. +.PP +cos(\fIx\fR) Cosine function, \fIx\fR in radians. +.PP +exp(\fIx\fR) Exponential function. +.PP +int(\fIx\fR) Returns \fIx\fR truncated towards zero. +.PP +log(\fIx\fR) Natural logarithm. +.PP +rand() Returns a random number between zero and one. +.PP +sin(\fIx\fR) Sine function, \fIx\fR in radians. +.PP +sqrt(\fIx\fR) Returns square root of \fIx\fR. +.fi +.TP +srand(\fIexpr\fR) srand() +Seeds the random number generator, using the clock if +.I expr +is omitted, and returns the value of the previous seed. +.B lmawk +seeds the random number generator from the clock at startup +so there is no real need to call srand(). Srand(\fIexpr\fR) +is useful for repeating pseudo random sequences. +.RE +.PP +Misc functions (libmawk extensions) +.RS +.PP +.TP +call(\fIfname,arg1,arg2,...\fR) +Call awk function \fIfname\fR with the supplied arguments. If the call fails, +empty value, else the return value of the callee is returned. Built-in variable +ERRNO is always set. +.TP +acall(\fIfname,arrname\fR) +Call awk function \fIfname\fR with arguments supplied in array named \fIarrname\fR +(both arguments are strings naming an existing object). +The array should be indexed from 1. Number of arguments is determined by +looking for the first empty (non-existing) index in the array. If the call fails, +empty value, else the return value of the callee is returned. Built-in variable +ERRNO is always set. +.TP +valueof(\fIvname [,idx]\fR) +Return the value of variable \fIfname\fR; if the variable is an array, return +the element indexed by \fIidx\fR (which must be present in this case). If index +is not present or is empty (""), the variable is expected to be scalar. Built-in variable +ERRNO is always set. NOTE: valueof() has access to the global symbol table only. +It will fail to resolve anything else than global objects; most notably it +will fail on local variables, $ arguments and on most of the built-in variables. +.RE +.\" +.SS "\fB9. Input and output" +There are two output statements, +.B print +and +.BR printf . +.RS +.TP +print +writes +.B "$0 ORS" +to standard output. +.TP +print \*(ex\d1\u, \*(ex\d2\u, ..., \*(ex\dn\u +writes +\*(ex\d1\u \fBOFS \*(ex\d2\u \fBOFS\fR ... \*(ex\dn\u +.B ORS +to standard output. Numeric expressions are converted to +string with +.BR OFMT . +.TP +printf \fIformat, expr-list\fR +duplicates the printf C library function writing to standard output. +The complete ANSI C format specifications are recognized with +conversions %c, %d, %e, %E, %f, %g, %G, +%i, %o, %s, %u, %x, %X and %%, +and conversion qualifiers h and l. +.RE +.PP +The argument list to print or printf can optionally be enclosed in +parentheses. +Print formats numbers using +.B OFMT +or "%d" for exact integers. +"%c" with a numeric argument prints the corresponding 8 bit +character, with a string argument it prints the first character of +the string. +The output of print and printf can be redirected to a file or +command by appending > +.IR file , +>> +.I file +or +| +.I command +to the end of the print statement. +Redirection opens +.I file +or +.I command +only once, subsequent redirections append to the already open stream. +By convention, +.B lmawk +associates the filename "/dev/stderr" with stderr which allows +print and printf to be redirected to stderr. +.B lmawk +also associates "\-" and "/dev/stdout" with stdin and stdout which +allows these streams to be passed to functions. +Opening /dev/fd/N will do an fdopen() on file descriptor N, where +N is an integer - this is a libmawk extension. If any of the +/dev heuristics needs to be bypassed (i.e. the script wants to +open the real /dev/stdout or the real /dev/fd/5), the leading +slash should be doubled (e.g. //dev/fd/5). +.PP +The input function +.B getline +has the following variations. +.RS +.TP +getline +reads into +.BR $0 , +updates the fields, +.BR NF , +.B NR +and +.BR FNR . +.TP +getline < \fIfile\fR +reads into +.B $0 +from \fIfile\fR, +updates the fields and +.BR NF . +.TP +getline \fIvar +reads the next record into +.IR var , +updates +.B NR +and +.BR FNR . +.TP +getline \fIvar\fR < \fIfile +reads the next record of +.I file +into +.IR var . +.TP +\fI command\fR | getline +pipes a record from +.I command +into +.B $0 +and updates the fields and +.BR NF . +.TP +\fI command\fR | getline \fIvar +pipes a record from +.I command +into +.IR var . +.RE +.PP +Getline returns 0 on end-of-file, \-1 on error, otherwise 1. +.PP +Commands on the end of pipes are executed by /bin/sh. +.PP +The function \fBclose\fR(\*(ex) closes the file or pipe +associated with +.IR expr . +Close returns 0 if +.I expr +is an open file, +the exit status if +.I expr +is a piped command, and \-1 otherwise. +Close is used to reread a file or command, make sure the other +end of an output pipe is finished or conserve file resources. +.PP +The function \fBfflush\fR(\*(ex) flushes the output file or pipe +associated with +.IR expr . +Fflush returns 0 if +.I expr +is an open output stream else \-1. +Fflush without an argument flushes stdout. +Fflush with an empty argument ("") flushes all open output. +.PP +The function +\fBsystem\fR(\fIexpr\fR) +uses +/bin/sh +to execute +.I expr +and returns the exit status of the command +.IR expr . +Changes made to the +.B ENVIRON +array are not passed to commands executed with +.B system +or pipes. +.SS \fB10. User defined functions +The syntax for a user defined function is +.nf +.sp + \fBfunction\fR name( \fIargs\fR ) { \fIstatements\fR } +.sp +.fi +The function body can contain a return statement +.nf +.sp + \fBreturn\fI opt_expr\fR +.sp +.fi +A return statement is not required. +Function calls may be nested or recursive. +Functions are passed expressions by value +and arrays by reference. +Extra arguments serve as local variables +and are initialized to +.IR null . +For example, csplit(\fIs,\|A\fR) puts each character of +.I s +into array +.I A +and returns the length of +.IR s . +.nf +.sp + function csplit(s, A, n, i) + { + n = length(s) + for( i = 1 ; i <= n ; i++ ) A[i] = substr(s, i, 1) + return n + } +.sp +.fi +Putting extra space between passed arguments and local +variables is conventional. +Functions can be referenced before they are defined, but the +function name and the '(' of the arguments must touch to +avoid confusion with concatenation. +.\" +.SS "\fB11. Splitting strings, records and files" +Awk programs use the same algorithm to +split strings into arrays with split(), and records into fields +on +.BR FS . +.B lmawk +uses essentially the same algorithm to split files into +records on +.BR RS . +.PP +Split(\fIexpr,\|A,\|sep\fR) works as follows: +.RS +.TP +(1) +If +.I sep +is omitted, it is replaced by +.BR FS . +.I Sep +can be an expression or regular expression. If it is an +expression of non-string type, it is converted to string. +.TP +(2) +If +.I sep += " " (a single space), +then is trimmed from the front and back of +.IR expr , +and +.I sep +becomes . +.B lmawk +defines as the regular expression +/[\ \et\en]+/. +Otherwise +.I sep +is treated as a regular expression, except that meta-characters +are ignored for a string of length 1, +e.g., +split(x, A, "*") and split(x, A, /\e*/) are the same. +.TP +(3) +If \*(ex is not string, it is converted to string. +If \*(ex is then the empty string "", split() returns 0 +and +.I A +is set empty. +Otherwise, +all non-overlapping, non-null and longest matches of +.I sep +in +.IR expr , +separate +.I expr +into fields which are loaded into +.IR A . +The fields are placed in +A[1], A[2], ..., A[n] and split() returns n, the number +of fields which is the number +of matches plus one. +Data placed in +.I A +that looks numeric is typed number and string. +.RE +.PP +Splitting records into fields works the same except the +pieces are loaded into +.BR $1 , +\fB$2\fR,..., +.BR $NF . +If +.B $0 +is empty, +.B NF +is set to 0 and all +.B $i +to "". +.PP +.B lmawk +splits files into records by the same algorithm, but with the +slight difference that +.B RS +is really a terminator instead of a separator. +(\fBORS\fR is really a terminator too). +.RS +.PP +E.g., if +.B FS += ":+" and +.B $0 += "a::b:" , then +.B NF += 3 and +.B $1 += "a", +.B $2 += "b" and +.B $3 += "", but +if "a::b:" is the contents of an input file and +.B RS += ":+", then +there are two records "a" and "b". +.RE +.PP +.B RS += " " is not special. +.PP +If +.B FS += "", then +.B lmawk +breaks the record into individual characters, and, similarly, +split(\fIs,A,\fR"") places the individual characters of +.I s +into +.IR A . +.\" +.SS "\fB12. Multi-line records" +Since +.B lmawk +interprets +.B RS +as a regular expression, multi-line +records are easy. Setting +.B RS += "\en\en+", makes one or more blank +lines separate records. If +.B FS += " " (the default), then single +newlines, by the rules for above, become space and +single newlines are field separators. +.RS +.PP +For example, if a file is "a\ b\enc\en\en", +.B RS += "\en\en+" and +.B FS += "\ ", then there is one record "a\ b\enc" with three +fields "a", "b" and "c". Changing +.B FS += "\en", gives two +fields "a b" and "c"; changing +.B FS += "", gives one field +identical to the record. +.RE +.PP +If you want lines with spaces or tabs to be considered blank, +set +.B RS += "\en([\ \et]*\en)+". +For compatibility with other awks, setting +.B RS += "" has the same +effect as if blank lines are stripped from the +front and back of files and then records are determined as if +.B RS += "\en\en+". +Posix requires that "\en" always separates records when +.B RS += "" regardless of the value of +.BR FS . +.B lmawk +does not support this convention, because defining +"\en" as makes it unnecessary. +.\" +.PP +Most of the time when you change +.B RS +for multi-line records, you +will also want to change +.B ORS +to "\en\en" so the record spacing is preserved on output. +.\" +.SS "\fB13. Program execution" +This section describes the order of program execution. +First +.B ARGC +is set to the total number of command line arguments passed to +the execution phase of the program. +.B ARGV[0] +is set the name of the AWK interpreter and +\fBARGV[1]\fR ... +.B ARGV[ARGC-1] +holds the remaining command line arguments exclusive of +options and program source. +For example with +.nf +.sp + lmawk \-f prog v=1 A t=hello B +.sp +.fi +.B ARGC += 5 with +.B ARGV[0] += "lmawk", +.B ARGV[1] += "v=1", +.B ARGV[2] += "A", +.B ARGV[3] += "t=hello" and +.B ARGV[4] += "B". +.PP +Next, each +.B BEGIN +block is executed in order. +If the program consists +entirely of +.B BEGIN +blocks, then execution terminates, else +an input stream is opened and execution continues. +If +.B ARGC +equals 1, +the input stream is set to stdin, +else the command line arguments +.BR ARGV[1] " ... +.B ARGV[ARGC-1] +are examined for a file argument. +.PP +The command line arguments divide into three sets: +file arguments, assignment arguments and empty strings "". +An assignment has the form +\fIvar\fR=\fIstring\fR. +When an +.B ARGV[i] +is examined as a possible file argument, +if it is empty it is skipped; +if it is an assignment argument, the assignment to +.I var +takes place and +.B i +skips to the next argument; +else +.B ARGV[i] +is opened for input. +If it fails to open, execution terminates with exit code 2. +If no command line argument is a file argument, then input +comes from stdin. +Getline in a +.B BEGIN +action opens input. "\-" as a file argument denotes stdin. +.PP +Once an input stream is open, each input record is tested +against each +.IR pattern , +and if it matches, the associated +.I action +is executed. +An expression pattern matches if it is boolean true (see +the end of section 2). +A +.B BEGIN +pattern matches before any input has been read, and +an +.B END +pattern matches after all input has been read. +A range pattern, +\fIexpr\fR1,\|\fIexpr\fR2 , +matches every record between the match of +.IR expr 1 +and the match +.IR expr 2 +inclusively. +.PP +When end of file occurs on the input stream, the remaining +command line arguments are examined for a file argument, and +if there is one it is opened, else the +.B END +.I pattern +is considered matched +and all +.B END +.I actions +are executed. +.PP +In the example, the assignment +v=1 +takes place after the +.B BEGIN +.I actions +are executed, and +the data placed in +v +is typed number and string. +Input is then read from file A. +On end of file A, +t +is set to the string "hello", +and B is opened for input. +On end of file B, the +.B END +.I actions +are executed. +.PP +Program flow at the +.I pattern +.I {action} +level can be changed with the +.nf +.sp + \fBnext + \fBexit \fIopt_expr\fR +.sp +.fi +statements. +A +.B next +statement +causes the next input record to be read and pattern testing +to restart with the first +.I "pattern {action}" +pair in the program. +An +.B exit +statement +causes immediate execution of the +.B END +actions or program termination if there are none or +if the +.B exit +occurs in an +.B END +action. +The +.I opt_expr +sets the exit value of the program unless overridden by +a later +.B exit +or subsequent error. + +.SS "\fB14. include" + +.nf +libmawk introduces source inclusion feature. Syntax is: + + include "filename" + +Include statements must be on top level (outside of blocks). If file name +starts with a plus sign ('+'), the script file is not loaded if it has +been already loaded (by another include or -f command line argument). + + +.SH EXAMPLES +.nf +1. emulate cat. + + { print } + +2. emulate wc. + + { chars += length($0) + 1 # add one for the \en + words += NF + } + + END{ print NR, words, chars } + +3. count the number of unique "real words". + + BEGIN { FS = "[^A-Za-z]+" } + + { for(i = 1 ; i <= NF ; i++) word[$i] = "" } + + END { delete word[""] + for ( i in word ) cnt++ + print cnt + } + +.fi +4. sum the second field of +every record based on the first field. +.nf + + $1 ~ /credit\||\|gain/ { sum += $2 } + $1 ~ /debit\||\|loss/ { sum \-= $2 } + + END { print sum } + +5. sort a file, comparing as string + + { line[NR] = $0 "" } # make sure of comparison type + # in case some lines look numeric + + END { isort(line, NR) + for(i = 1 ; i <= NR ; i++) print line[i] + } + + #insertion sort of A[1..n] + function isort( A, n, i, j, hold) + { + for( i = 2 ; i <= n ; i++) + { + hold = A[j = i] + while ( A[j\-1] > hold ) + { j\-\|\- ; A[j+1] = A[j] } + A[j] = hold + } + # sentinel A[0] = "" will be created if needed + } + +.fi +.SH "COMPATIBILITY ISSUES" +The Posix 1003.2(draft 11.3) definition of the AWK language +is AWK as described in the AWK book with a few extensions +that appeared in SystemVR4 nawk. The extensions are: +.sp +.RS +New functions: toupper() and tolower(); libmawk extensions: call(), acall(), valueof(). + +New variables: ENVIRON[\|] and CONVFMT; libmawk extension: ERRNO, LIBPATH. +As a libmawk extension, ENVIRON affects the environment of children processes. + +As a libmawk extension, new built-in variable LIBPATH is used as a list +of search paths while loading scripts from the command line or from include. + +If a script name starts with plus ('+'), the file is not loaded if it has +been loaded earlier (to avoid double loading libs trough -f and/or include). +This is a libmawk extension. + +It is possible to include a script from another script using keyword +include "scriptname.awk" (libmawk extension). + +ANSI C conversion specifications for printf() and sprintf(). + +New command options: \-v var=value, multiple -f options and +implementation options as arguments to \-W. +.RE +.sp + +Posix AWK is oriented to operate on files a line at +a time. +.B RS +can be changed from "\en" to another single character, +but it +is hard to find any use for this \(em there are no +examples in the AWK book. +By convention, \fBRS\fR = "", makes one or more blank lines +separate records, allowing multi-line records. When +\fBRS\fR = "", "\en" is always a field separator +regardless of the value in +.BR FS . +.PP +.BR lmawk , +on the other hand, +allows +.B RS +to be a regular expression. +When "\en" appears in records, it is treated as space, and +.B FS +always determines fields. +.PP +Removing the line at a time paradigm can make some programs +simpler and can +often improve performance. For example, +redoing example 3 from above, +.nf +.sp + BEGIN { RS = "[^A-Za-z]+" } + + { word[ $0 ] = "" } + + END { delete word[ "" ] + for( i in word ) cnt++ + print cnt + } +.sp +.fi +counts the number of unique words by making each word a record. +On moderate size files, +.B lmawk +executes twice as fast, because of the simplified inner loop. +.PP +The following program replaces each comment by a single space in +a C program file, +.nf +.sp + BEGIN { + RS = "/\|\e*([^*]\||\|\e*+[^/*])*\e*+/" + # comment is record separator + ORS = " " + getline hold + } + + { print hold ; hold = $0 } + + END { printf "%s" , hold } +.sp +.fi +Buffering one record is needed to avoid terminating the last +record with a space. +.PP +With +.BR lmawk , +the following are all equivalent, +.nf +.sp + x ~ /a\e+b/ x ~ "a\e+b" x ~ "a\e\e+b" +.sp +.fi +The strings get scanned twice, once as string and once as +regular expression. On the string scan, +.B lmawk +ignores the escape on non-escape characters while the AWK +book advocates +.I \ec +be recognized as +.I c +which necessitates the double escaping of meta-characters in +strings. +Posix explicitly declines to define the behavior which passively +forces programs that must run under a variety of awks to use +the more portable but less readable, double escape. +.PP +Posix AWK does not recognize "/dev/std{out,err}" or \ex hex escape +sequences in strings. Unlike ANSI C, +.B lmawk +limits the number of digits that follows \ex to two as the current +implementation only supports 8 bit characters. +The built-in +.B fflush +first appeared in a recent (1993) AT&T awk released to netlib, and is +not part of the posix standard. Aggregate deletion with +.B delete +.I array +is not part of the posix standard. +.PP +Posix explicitly leaves the behavior of +.B FS += "" undefined, and mentions splitting the record into characters as +a possible interpretation, but currently this use is not portable +across implementations. +.PP +Finally, here is how +.B lmawk +handles exceptional cases not discussed in the +AWK book or the Posix draft. It is unsafe to assume +consistency across awks and safe to skip to +the next section. +.PP +.RS +substr(s, i, n) returns the characters of s in the intersection +of the closed interval [1, length(s)] and the half-open interval +[i, i+n). When this intersection is empty, the empty string is +returned; so substr("ABC", 1, 0) = "" and +substr("ABC", \-4, 6) = "A". +.PP +Every string, including the empty string, matches the empty string +at the +front so, s ~ // and s ~ "", are always 1 as is match(s, //) and +match(s, ""). The last two set +.B RLENGTH +to 0. +.PP +index(s, t) is always the same as match(s, t1) where t1 is the +same as t with metacharacters escaped. Hence consistency +with match requires that +index(s, "") always returns 1. +Also the condition, index(s,t) != 0 if and only t is a substring +of s, requires index("","") = 1. +.PP +If getline encounters end of file, getline var, leaves var +unchanged. Similarly, on entry to the +.B END +actions, +.BR $0 , +the fields and +.B NF +have their value unaltered from the last record. +.SH SEE ALSO +.IR egrep (1), +.IR mawk (1) +.PP +Aho, Kernighan and Weinberger, +.IR "The AWK Programming Language" , +Addison-Wesley Publishing, 1988, (the AWK book), +defines the language, opening with a tutorial +and advancing to many interesting programs that delve into +issues of software design and analysis relevant to programming +in any language. +.PP +.IR "The GAWK Manual" , +The Free Software Foundation, 1991, is a tutorial +and language reference +that does not attempt the depth of the AWK book +and assumes the reader may be a novice programmer. +The section on AWK arrays is excellent. It also +discusses Posix requirements for AWK. +.SH BUGS +.B lmawk +cannot handle ascii NUL \e0 in the source or data files. You +can output NUL using printf with %c, and any other 8 bit +character is acceptable input. +.PP +.B lmawk +implements printf() and sprintf() using the C library functions, +printf and sprintf, so full ANSI compatibility requires an ANSI +C library. In practice this means the h conversion qualifier may +not be available. Also +.B lmawk +inherits any bugs or limitations of the library functions. +.PP +Implementors of the AWK language have shown a consistent lack +of imagination when naming their programs. +.SH AUTHOR +.PP +.B mawk: +Mike Brennan (brennan@whidbey.com). +.PP +.B libmawk extensions: +Tibor Palinkas (libmawk@igor2.repo.hu). diff --git a/src/libmawk/math_wrap.c b/src/libmawk/math_wrap.c new file mode 100644 index 0000000..d090d1d --- /dev/null +++ b/src/libmawk/math_wrap.c @@ -0,0 +1,61 @@ +/******************************************** +libmawk (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include +#include +#include "math_wrap.h" + +int PM_errno; + +double P_log_(double x, int *perrno) +{ + double ret; +#ifdef P_MBROKEN_LOG_M_0 + if (x == -0.0) { + *perrno = -1; + return 0; + } +#endif + +#ifdef P_MBROKEN_LOG_P_0 + if (x == +0.0) { + *perrno = -1; + return 0; + } +#endif + +#ifdef P_MBROKEN_LOG_P_1 + if (x == +1.0) { + *perrno = -1; + return 0; + } +#endif + +/* we don't have portable NaN, scconfig doesn't yet detect FP_NAN, we + have to emulate one by hand */ + if (x < 0) { + *perrno = -123; + return 0; + } + ret = log(x); + if (errno != 0) + *perrno = errno; + return ret; +} + + +double P_divf_(double x, double y, int *perrno) +{ + if (y == 0) { + *perrno = -123; + return 0; + } + return x/y; +} diff --git a/src/libmawk/math_wrap.h b/src/libmawk/math_wrap.h new file mode 100644 index 0000000..72ad5da --- /dev/null +++ b/src/libmawk/math_wrap.h @@ -0,0 +1,77 @@ +/* fallback for the case when the P_ math funcitons are used outside of + PM_ macros; not thread safe, should be avoided + + Options: + + 1. single thread + a. use PM_BEGIN/PM_ERROR/PM_END + b. use P_log() and manually check PM_errno after the call + 2. multithread + a. use PM_BEGIN/PM_ERROR/PM_END + b. use P_log_() with local variable errno, manually check it after the call + + Doesn't protect against 1/0 - use P_divf() for secure division. +*/ + +#ifndef MAWK_MATH_WRAP +#define MAWK_MATH_WRAP + +#include "num.h" + +extern int PM_errno; + +#define P_EFPE -1234 + +#define PM_BEGIN \ + { \ + int PM_errno = 0; \ + +#define PM_ERROR \ + ; \ + if (PM_errno != 0) + +#define PM_END \ + ;\ + } \ + +#define PM_ENDERR(errhandling) \ + ; \ + if (PM_errno != 0) { errhandling ; } \ + } + + +double P_log_(double x, int *perrno); + +#define P_log(x) P_log_(x, &PM_errno) +#define P_divf(x, y) P_divf_(x, y, &PM_errno) + +#ifdef MAWK_NO_FLOAT +/* integer variant - no nan */ +#define P_nansafe1(dest, operation, operand) \ + (dest) = (operation); \ + +#else +#ifdef MAWK_HAVE_SAFE_NAN + /* proper NAN support - don't do extra checks */ +# define P_nansafe1(dest, operation, operand) \ + (dest) = (operation); \ + +# define P_isnan_manual(x) 0 + +#else + /* broken or missing NAN support, always have to check manually */ +# define P_nansafe1(dest, operation, operand) \ + do { \ + if (P_isnan(operand) || P_isnan(dest)) (dest) = P_nan(); \ + else (dest) = (operation); \ + } while(0) +# define P_isnan_manual P_isnan +#endif + + +#endif + +/* forced check - for both cases */ +#define P_nansafe_exp1(exp, operand) (P_isnan(operand) ? P_nan() : (exp)) + +#endif diff --git a/src/libmawk/matherr.c b/src/libmawk/matherr.c new file mode 100644 index 0000000..c148fa6 --- /dev/null +++ b/src/libmawk/matherr.c @@ -0,0 +1,242 @@ + +/******************************************** +mawk_matherr.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "mawk.h" +#include "num.h" + +struct exception { + int dummy; +}; + +/* Sets up NetBSD 1.0A for ieee floating point */ +#if defined(_LIB_VERSION_TYPE) && defined(_LIB_VERSION) && defined(_IEEE_) +_LIB_VERSION_TYPE _LIB_VERSION = _IEEE_; +#endif + +#ifdef USE_IEEEFP_H +#include +#endif + +#ifndef TURN_OFF_FPE_TRAPS +#define TURN_OFF_FPE_TRAPS() /* nothing */ +#endif + +#ifndef TURN_ON_FPE_TRAPS +#define TURN_ON_FPE_TRAPS() /* nothing */ +#endif + +#ifdef SV_SIGINFO +#include +#define FPE_ZERODIVIDE FPE_FLTDIV +#define FPE_OVERFLOW FPE_FLTOVF +#endif + +#ifdef FPE_TRAPS_ON +#include + +/* machine dependent changes might be needed here */ + +#ifdef SV_SIGINFO +static void fpe_catch(int signal, siginfo_t *sip) +{ + int why = sip->si_code; + +#else + +static void fpe_catch(int signal, int why) +{ +#endif /* SV_SIGINFO */ + +#if NOINFO_SIGFPE + mawk_rt_error("floating point exception, probably mawk_overflow"); + /* does not return */ +#else + + switch (why) { + case FPE_ZERODIVIDE: + mawk_rt_error("division by zero"); + + case FPE_OVERFLOW: + mawk_rt_error("floating point mawk_overflow"); + + default: + mawk_rt_error("floating point exception"); + } +#endif /* noinfo_sigfpe */ +} + +void mawk_fpe_init(void) +{ + return; + TURN_ON_FPE_TRAPS(); + +#ifndef SV_SIGINFO + signal(SIGFPE, fpe_catch); + +#else + { + struct sigaction x; + + memset(&x, 0, sizeof(x)); + x.sa_handler = fpe_catch; + x.sa_flags = SA_SIGINFO; + + sigaction(SIGFPE, &x, (struct sigaction *) 0); + } +#endif + +#ifdef HAVE_STRTOD_OVF_BUG + /* we've already turned the traps on */ + working_mask = fpgetmask(); + MAWK->entry_mask = working_mask & ~FP_X_DZ & ~FP_X_OFL; +#endif +} + +#else /* FPE_TRAPS not defined */ + +void mawk_fpe_init(void) +{ + return; + TURN_OFF_FPE_TRAPS(); +} +#endif + +#ifndef NO_MATHERR + +#ifndef FPE_TRAPS_ON + +/* If we are not trapping math errors, we will shutup the library calls +*/ + +int mawk_matherr(struct exception *e) +{ + return 1; +} + +#else /* print error message and exit */ + +int mawk_matherr(struct exception *e) +{ + char *error; + + switch (e->type) { + case DOMAIN: + case SING: + error = "domain error"; + break; + + case OVERFLOW: + error = "mawk_overflow"; + break; + + case TLOSS: + case PLOSS: + error = "loss of significance"; + break; + + case UNDERFLOW: + e->retval = MAWK_NUM_ZERO; + return 1; /* ignore it */ + } + + if (strcmp(e->name, "atan2") == 0) + mawk_rt_error("atan2(%g,%g) : %s", e->arg1, e->arg2, error); + else + mawk_rt_error("%s(%g) : %s", e->name, e->arg1, error); + + /* won't get here */ + return 0; +} +#endif /* FPE_TRAPS_ON */ + +#endif /* ! no mawk_matherr */ + + +/* this is how one gets the libm calls to do the right +thing on bsd43_vax */ + +#ifdef BSD43_VAX + +#include + +double infnan(int arg) +{ + switch (arg) { + case ERANGE: + errno = ERANGE; + return HUGE; + case -ERANGE: + errno = EDOM; + return -HUGE; + default: + errno = EDOM; + } + return MAWK_NUM_ZERO; +} + +#endif /* BSD43_VAX */ + +/* This routine is for XENIX-68K 2.3A. + Error check routine to be called after fp arithmetic. +*/ + +#if SW_FP_CHECK +/* Definitions of bit values in iserr() return value */ + +#define OVFLOW 2 +#define UFLOW 4 +#define ZERODIV 8 +#define OVFLFIX 32 +#define INFNAN 64 + +void fpcheck(void) +{ + register int fperrval; + char *errdesc; + + if ((fperrval = iserr()) == 0) + return; /* no error */ + + errdesc = (char *) 0; + + if (fperrval & INFNAN) + errdesc = "arg is infinity or NAN"; + else if (fperrval & ZERODIV) + errdesc = "division by zero"; + else if (fperrval & OVFLOW) + errdesc = "mawk_overflow"; + else if (fperrval & UFLOW); /* ignored */ + + if (errdesc) + mawk_rt_error("%s", errdesc); +} + +#endif + +#ifdef HAVE_STRTOD_OVF_BUG +/* buggy strtod in solaris, probably any sysv with ieee754 + strtod can generate an fpe */ + +double strtod_with_ovf_bug(const char *s, char **ep) +{ + double ret; + + fpsetmask(MAWK->entry_mask); /* traps off */ +#undef strtod /* make real strtod visible */ + ret = strtod(s, ep); + fpsetmask(working_mask); /* traps on */ + return ret; +} +#endif diff --git a/src/libmawk/mawk.h b/src/libmawk/mawk.h new file mode 100644 index 0000000..8fadb57 --- /dev/null +++ b/src/libmawk/mawk.h @@ -0,0 +1,578 @@ + +/******************************************** +mawk.h + +libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-94, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef MAWK_H +#define MAWK_H + +#include +#include +#include + +typedef struct mawk_vio_s mawk_vio_t; + + +typedef enum mawk_errno_e { + MAWK_ESUCCES = 0, + MAWK_ECANTOPEN = -1, + MAWK_EHDRSIZE = -2, + MAWK_EFILEMAGIC = -3, + MAWK_EBYTEORDER = -4, + MAWK_EVERSION = -5, + MAWK_EINSTSIZE = -6, + MAWK_ENUMSIZE = -7, + MAWK_EALLOC = -8, + MAWK_EWRONGVAL = -9, + + MAWK_Elast = -9 +} mawk_errno_t; +const char *mawk_strerror(mawk_errno_t err); + +typedef struct mawk_state_s mawk_state_t; +typedef struct fcall FCALL_REC; + +#include +#include +#include + +#define NUM_PFIELDS 5 + +#define SPRINTF_SZ sizeof(MAWK->tempbuff) + +typedef struct pfile { + struct pfile *link; + const char *fname; + char bytecode; /* 1 if file is expected to be bytecode */ +} PFILE; + +typedef struct mawk_fin_s mawk_input_t; + +typedef struct array *mawk_array_t; + +/* array implementation callbacks; default implementation is in array_orig.c */ + +/* look up index mawk_cell_t in mawk_array_t and return 1 if it exists. If result is non-NULL, + it is first destroyed (regardless of whether the index exists in the array) + and if the index exists, its value is copied into result. The caller has to + destroy result after the call. Modifications to result will not affect the array. + NOTE: idx may be the same pointer as result: it's guaranteed that result is + destroyed after indexing. + + If create is 1, a non-existing member is created with empty value + */ +typedef int mawk_array_find_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *result, int create); + +/* set a member of the mawk_array_t at idx to val */ +typedef void mawk_array_set_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx, mawk_cell_t *val); + +/* delete a single index (mawk_cell_t) from the array; called by "delete A[i]" */ +typedef void mawk_array_delete_t(mawk_state_t *MAWK, mawk_array_t arr, const mawk_cell_t *idx); + +/* free all elements of the array and clean the array; the array should + be a valid empty array after the operation */ +typedef void mawk_array_clear_t(mawk_state_t *MAWK, mawk_array_t arr); + +/* list and return all indices in a string array; used by "for(a in ARR)" + must return a pointer alloced by mawk_malloc() or NULL if the array is empty */ +typedef mawk_string_t **mawk_array_loop_vector_t(mawk_state_t *MAWK, mawk_array_t arr, unsigned *vsize); + +/* load the array from MAWK->split_ov_list (called exclusively from split()) */ +typedef void mawk_array_load_t(mawk_state_t *MAWK, mawk_array_t arr, int cnt); + +/* optional call for _generic: iteration; no change is done to the array during + the iteration, except for deleting the last returned element in clear() */ +/* start iterating over all members, returns iterator */ +typedef void *mawk_array_it_start_t(mawk_state_t *MAWK, mawk_array_t arr); +/* get index of the next member; returns NULL at the end */ +typedef const mawk_cell_t *mawk_array_it_next_t(mawk_state_t *MAWK, mawk_array_t arr, void *iterator); +/* called after the last element or if the iteration is to be stopped */ +typedef void mawk_array_it_stop_t(mawk_state_t *MAWK, mawk_array_t arr, void *iterator); + + +/* a whole implementation conists of all of the above: */ +typedef struct array_imp_s { + /* any array implementation has to provide these: */ + mawk_array_find_t *find; + mawk_array_set_t *set; + mawk_array_delete_t *delet; + /* option A: manual implementation of these: */ + mawk_array_clear_t *clear; + mawk_array_loop_vector_t *loop_vect; + mawk_array_load_t *load; + /* option B: use array_generic and provide an iterator: */ + mawk_array_it_start_t *it_start; + mawk_array_it_next_t *it_next; + mawk_array_it_stop_t *it_stop; +} array_imp_t; + +/* an actual array */ +struct array { + array_imp_t imp; /* implementation */ + PTR ptr; /* What this points to depends on the type and implementation */ + unsigned size; /* number of elts in the table */ + unsigned limit; /* Meaning depends on type and implementation */ + unsigned hmask; /* bitwise and with hash value to get table index */ + short type; /* values in AY_NULL .. AY_SPLIT */ + + union { /* state for custom (non-orig) impelementations; 0;NULL by default */ + int i; + void *p; + } state; +}; + +typedef struct { + INST *base, *limit, *warn, *ptr; +} CODEBLOCK; + +/*------------------------ + user defined functions + ------------------------*/ + +typedef struct fblock FBLOCK; + +struct fblock { + const char *name; + INST *code; + unsigned size; /* allocated size for proper cleanup with zfree() */ + unsigned short nargs; + char *typev; /* array of size nargs holding types */ + + FBLOCK *c_next; /* linked list of all c function call blocks compiled into the code so they can be free'd at the end; see also: MAWK->c_calls */ +}; /* function block */ + +typedef struct jmp { + struct jmp *link; + int source_offset; +} JMP; + +typedef struct bc { + struct bc *link; /* stack as linked list */ + int type; /* 'B' or 'C' or mark start with 0 */ + int source_offset; /* position of _JMP */ +} BC; + +/* a stack to hold some pieces of code while + reorganizing loops . +*/ + +typedef struct mc { /* mc -- move code */ + struct mc *link; + INST *code; /* the save code */ + unsigned len; /* its length */ + int scope; /* its scope */ + int move_level; /* size of this stack when coded */ + FBLOCK *fbp; /* if scope FUNCT */ + int offset; /* distance from its code base */ +} MC; + +struct child { + int pid; + int exit_status; + struct child *link; +}; + +#define SAFETY 16 +#define DANGER (EVAL_STACK_SIZE-SAFETY) + +#define ET_END 9 +typedef struct { + char in, out; +} mawk_escape_t; + +struct mawk_fdump { + struct mawk_fdump *link; + FBLOCK *fbp; +}; + +/* We store dynamically created files on a linked linear + list with move to the front (big surprise) */ + +typedef struct file_node_s { + struct file_node_s *link; + mawk_string_t *name; + short type; + + /* direct file IO for output or direct input */ + mawk_vio_t *vf; + + /* .. or buffered (FIN) */ + mawk_input_t *fin; +} FILE_NODE; + +typedef struct { + char type; + char c; + PTR ptr; /* mawk_string_t* or RE machine* */ +} SEPARATOR; + +/* struct to hold info about builtins */ +typedef struct { + char *name; + PF_CP fp; /* ptr to function that does the builtin */ + unsigned char min_args, max_args; +/* info for parser to check correct number of arguments */ +} BI_REC; + +typedef struct { + const char *name; + char type; + unsigned char offset; /* offset in stack frame for local vars */ + union { + mawk_cell_t *cp; + int kw; + PF_CP fp; + const BI_REC *bip; + mawk_array_t array; + FBLOCK *fbp; + struct { + mawk_cell_t *(*callback) (mawk_state_t * context, mawk_cell_t * sp, int a_args); + void *func_userdata; + } c_function; + } stval; +} SYMTAB; + +typedef struct hash { + struct hash *link; + SYMTAB symtab; +} HASHNODE; + +#define POOLSZ 16 +#define ZBLOCKSZ 8 +#define ZSHIFT 3 +typedef union zblock { + char dummy[ZBLOCKSZ]; + union zblock *link; +} ZBLOCK; + +/* ZBLOCKS of sizes 1, 2, ... 16 + which is bytes of sizes 8, 16, ... , 128 + are stored on the linked linear lists in + pool[0], pool[1], ... , pool[15] +*/ + +typedef struct re_node { + mawk_string_t *sval; + PTR re; + struct re_node *link; +} RE_NODE; + +typedef struct repl_node { + struct repl_node *link; + mawk_string_t *sval; /* the input */ + mawk_cell_t *cp; /* the output */ +} REPL_NODE; + +typedef struct spov { + struct spov *link; + mawk_string_t *sval; +} SPLIT_OV; + +/* ---------------------------------------------------------------------- */ + +typedef struct mawk_debug_callstack_s mawk_debug_callstack_t; + +struct mawk_debug_callstack_s { + FBLOCK *f; + mawk_debug_callstack_t *next; +}; + +typedef struct mawk_parse_state_s { + const char *pfile_name; /* program input file */ + char pfile_bytecode; /* 1 if program input file is expected to be bytecode */ + int code_move_level; /* used as part of unique identification of context when moving code. Global for communication with parser. */ + mawk_string_t *program_string; + unsigned char *buffer; + unsigned char *buffp; + /* unsigned so it works with 8 bit chars */ + FILE_NODE *program_fin; + int eof_flag; +} mawk_parse_state_t; + + +typedef struct mawk_mm_s mawk_mm_t; + +struct mawk_mm_s { + mawk_mm_t *prev, *next; + int size; + char data[1]; /* actual data */ +} ; + + +/* regex lib */ +typedef unsigned char mawk_BV[32]; /* bit vector */ + +typedef struct { + char type; + unsigned char len; /* used for M_STR */ + union { + char *str; /* string */ + mawk_BV *bvp; /* class */ + int jump; + } data; +} mawk_RESTATE; + +/* function callback type: this how execute() can call external C functions */ +typedef mawk_cell_t *libmawk_c_function(mawk_state_t *context, mawk_cell_t * sp, int a_args); + +/* struct for the run time stack */ +typedef struct { + mawk_RESTATE *m; /* save the machine ptr */ + int u; /* save the u_flag */ + char *s; /* save the active string ptr */ + char *ss; /* save the match start -- only used by mawk_REmatch */ +} mawk_RT_STATE; /* run time state */ + +struct mawk_state_s { + +#ifdef DEBUG +#define YYDEBUG 1 + int yydebug; /* print parse if on */ + int dump_RE; +#endif + + short posix_space_flag, interactive_flag; + +/* a well known string */ + mawk_string_t null_str; + +/* a useful scratch area */ + union { + mawk_string_t *_split_buff[MAX_SPLIT]; + char _string_buff[MIN_SPRINTF]; + } tempbuff; + + + /* help with casts */ + unsigned long *mpow2; + + mawk_cell_t field[FBANK_SZ + NUM_PFIELDS]; + /* $0, $1 ... $(MAX_SPLIT), NF, RS, RS, CONVFMT, OFMT */ + + /* more fields if needed go here */ + mawk_cell_t *fbank[NUM_FBANK]; /* fbank[0] == field */ + + + /* these are used by the parser, scanner and error messages + from the compile */ + + PFILE *pfile_list, *pfile_list_tail; + + int current_token; + unsigned token_lineno; /* lineno of current token */ + unsigned compile_error_count; + int paren_cnt, brace_cnt; + int print_flag, getline_flag; + short mawk_state; + char *progname; /* for error messages */ + unsigned rt_nr, rt_fnr; /* ditto */ + + /* this can be moved and enlarged by -W sprintf=num */ + char *sprintf_buff; + char *sprintf_limit; + + FILE_NODE *main_input; + mawk_array_t Argv; /* to the user this is ARGV */ + mawk_num_t argi; /* index of next ARGV[argi] to try to open */ + unsigned lineno; + int NR_flag; /* are we tracking NR */ + + CODEBLOCK active_code; + CODEBLOCK *main_code_p, *begin_code_p, *end_code_p; + INST *begin_start, *main_start, *end_start, *end_start_orig; + unsigned begin_size, main_size, end_size; + INST *execution_start; + + int dump_code_flag; /* if on dump internal code */ + int dump_sym_flag; /* if on dump internal symbols */ + + INST *restart_label; /* control flow labels */ + INST *next_label; + mawk_cell_t tc; /*useful temp */ + int scope; + FBLOCK *active_funct; /* when scope is SCOPE_FUNCT */ + JMP *jmp_top; + BC *bc_top; + MC *mc_top; + + mawk_parse_state_t ps; /* current parse state */ + mawk_array_t scripts_loaded; /* a hash indeced by full paths of scripts already loaded */ + mawk_parse_state_t *mawk_parser_stack; /* parse state stack for "include" */ + int pstack_alloced, pstack_used; + + int check_progress; /* flag that indicates call_arg_check() was able to type check some call arguments */ + struct child *child_list; /* dead children are kept on this list */ + unsigned repl_cnt; /* number of global replacements */ + long seed; /* must be >=1 and < 2^31-1 */ + mawk_cell_t cseed; /* argument of last call to srand() */ + mawk_cell_t eval_stack[EVAL_STACK_SIZE]; + mawk_cell_t *sp; + mawk_cell_t *stack_base; /* these can move for deep recursion */ + mawk_cell_t *stack_danger; + int exit_code, final_exit_code, rt_exit_code; +# ifdef HAVE_STRTOD_OVF_BUG + fp_except entry_mask; + fp_except working_mask; +# endif + mawk_escape_t escape_test[ET_END + 1]; + struct mawk_fdump *fdump_list; /* linked list of all user functions */ + FILE_NODE *file_list; + char *shell; /* hardwire to /bin/sh for portability of programs */ + + int max_field; /* maximum field actually created */ +/* a description of how to split based on RS. + If RS is changed, so is rs_shadow */ + SEPARATOR rs_shadow; + /* a splitting mawk_cell_t version of FS */ + mawk_cell_t fs_shadow; + int nf; /* nf holds the true value of NF. If nf < 0 , then NF has not been computed, i.e., $0 has not been split */ + HASHNODE *hash_table[HASH_PRIME]; + HASHNODE *save_list; /* when processing user functions, global ids which are replaced by local ids are saved on this list */ + unsigned last_hash; + + /* large block allocator (memory accounting and free-later mechanism) */ + mawk_mm_t *mawk_mm_head; + int mm_used, mm_max; + + /* small block allocator in zmalloc.[ch] (pooling) */ + ZBLOCK *pool[POOLSZ]; /* pool of blocks already free'd, indexed by size (in blocks) */ + unsigned amt_avail; /* how many blocks are unclaimed at the end of ->avail */ + ZBLOCK *avail; /* the chunk we split up for new allocations */ + + RE_NODE *re_list; /* a list of compiled regular expressions */ + REPL_NODE *repl_list; /* here's our old friend linked linear list with move to the front for compilation of replacement CELLs */ + char scan_code[256]; + SPLIT_OV *split_ov_list; + + libmawk_c_function *func_being_called; /* the C function that's being called back from execute() */ + + void *func_userdata; /* during calls to C functions, func_userdata has the value that it had during registration of that function (it's saved and restored) */ + void *ctx_userdata; /* set by the user, never touched by libmawk */ + + int last_token_lineno; /* last token line number to detect source line change for adding debug info */ + mawk_debug_callstack_t *debug_callstack; + + FCALL_REC *resolve_list; + void *lvalp; + mawk_cell_t code_call_id_dummy; + + mawk_cell_t bi_vars[NUM_BI_VAR]; + + + /* regex lib state */ + int REerrno; + mawk_RT_STATE *RE_run_stack_base; + mawk_RT_STATE *RE_run_stack_limit; + mawk_RT_STATE *RE_run_stack_empty; /* Large model DOS segment arithemetic breaks the current stack. This hack fixes it without rewriting the whole thing, 5/31/91 */ + mawk_BV **REbv_base, **REbv_limit; + mawk_BV **REbv_next; /* next empty slot in the array */ + int REbv_alloced; + int REprev; + unsigned RElen; + char *RElp; /* ptr to reg exp string */ + + unsigned long runlimit; /* how many instructions to run before returning; 0 means "unlimited" (2^32) */ + + /* should be a bitfield! */ + int debug_symbols; /* add location infoand other debug symbol data to the code */ + int separate_begin; /* if not zero, after running BEGIN blocks, no main block is automaticly executed */ + int suppress_undefined_function_warning; /* if not zero, do not warn about functions undefined */ + int no_program_ok; /* it is ok if there's no program after processing argv[] */ + + int do_exit; /* non-zero if we should exit immediately (added for exiting from the parser) */ + int wants_to_exit; /* non-zero if a script decied to exit but libmawk didn't really stop it (doesn't happen in main.c but happens with libmawk.c) */ + int binary_loaded; /* non-zero if no text parsing is required (binary file has been loaded) */ + + /* hooks */ + const char *(*file_name_rewrite)(const char *orig_name, char *buff, int buff_size, int type); /* called any time the script wants to open a new file (print redirection or getline); return orig_name or buff after filling in a new file name there or another string const (won't be freed); return NULL to deny opening the file */ + mawk_vio_init_t vio_init; + + FILE_NODE *fnode_stdin, *fnode_stdout, *fnode_stderr; + FBLOCK *c_funcs; /* list of c function calls - to be free'd on uninit */ +}; + +#define EXECUTION 1 /* other state is 0 compiling */ + + /* anonymous union */ +#define string_buff MAWK->tempbuff._string_buff +#define split_buff MAWK->tempbuff._split_buff + + +/* prototypes */ + +void mawk_cast1_to_str(mawk_state_t *, mawk_cell_t *); +void mawk_cast1_to_num(mawk_state_t *, mawk_cell_t *); +void mawk_cast2_to_str(mawk_state_t *, mawk_cell_t *); +void mawk_cast2_to_num(mawk_state_t *, mawk_cell_t *); +void mawk_cast_to_RE(mawk_state_t *, mawk_cell_t *); +void mawk_cast_for_split(mawk_state_t *, mawk_cell_t *); +void mawk_check_strnum(mawk_state_t *, mawk_cell_t *); +void mawk_cast_to_REPL(mawk_state_t *, mawk_cell_t *); + +#define d_to_i(d) ((int)mawk_d_to_I(d)) + + +int test(mawk_state_t *, mawk_cell_t *); /* test for null non-null */ +mawk_cell_t *repl_cpy(mawk_cell_t *, mawk_cell_t *); +void DB_cell_destroy(mawk_state_t *, mawk_cell_t *); +void overflow(mawk_state_t *, char *, unsigned); +void mawk_rt_overflow(mawk_state_t * MAWK, char *, unsigned); +void mawk_rt_error(mawk_state_t *, const char *, ...); +void mawk_set_errno(mawk_state_t * MAWK, const char *error); + + +void mawk_exit_(mawk_state_t *, int); +#define mawk_exitval(MAWK, x, RETVAL) \ + do { \ + mawk_exit_(MAWK, x); \ + return RETVAL; \ + } while(0); + +#define mawk_exit(MAWK, x) \ + do { \ + mawk_exit_(MAWK, x); \ + return; \ + } while(0); + +void mawk_da(mawk_state_t *, INST *, void *); +char *mawk_str_str(char *, char *, unsigned); +char *mawk_rm_escape(mawk_state_t *, char *); +char *mawk_re_pos_match(mawk_state_t *, char *, PTR, unsigned *); +int mawk_binmode(void); + + +void mawk_bozo(mawk_state_t *, char *); +void mawk_errmsg(mawk_state_t *, int, char *, ...); +void mawk_compile_error(mawk_state_t *, const char *, ...); + +void mawk_execute(mawk_state_t *, INST *, mawk_cell_t *, mawk_cell_t *); +const char *mawk_find_kw_str(int); + +void mawk_overflow(mawk_state_t * MAWK, char *s, unsigned size); +void mawk_bi_vars_init(mawk_state_t * MAWK); +void mawk_bi_funct_init(mawk_state_t * MAWK); +#ifdef MAKW_MEM_PEDANTIC +void mawk_bi_funct_uninit(mawk_state_t *MAWK) +#endif +void mawk_code_init(mawk_state_t *MAWK); +void mawk_parse(mawk_state_t *); + +#ifndef MAWK_NO_FLOAT +# ifdef HAVE_STRTOD_OVF_BUG + double strtod_with_ovf_bug(const char *, char **); +# define strtod strtod_with_ovf_bug +# endif +#endif + +#endif /* MAWK_H */ diff --git a/src/libmawk/memory.c b/src/libmawk/memory.c new file mode 100644 index 0000000..628d394 --- /dev/null +++ b/src/libmawk/memory.c @@ -0,0 +1,185 @@ + +/******************************************** +memory.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, 1992 Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "memory.h" + +static mawk_string_t *xnew_STRING(mawk_state_t *, unsigned); + + +static mawk_string_t *xnew_STRING(mawk_state_t *MAWK, unsigned len) +{ + mawk_string_t *sval = (mawk_string_t *) mawk_zmalloc(MAWK, len + STRING_OH); + + sval->len = len; + sval->ref_cnt = 1; + return sval; +} + +/* allocate space for a mawk_string_t */ + +mawk_string_t *mawk_new_STRING0(mawk_state_t *MAWK, unsigned len) +{ + if (len == 0) { + MAWK->null_str.ref_cnt++; + return &MAWK->null_str; + } + else { + mawk_string_t *sval = xnew_STRING(MAWK, len); + sval->str[len] = 0; + return sval; + } +} + +/* convert char* to mawk_string_t* */ + +mawk_string_t *mawk_new_STRING(mawk_state_t *MAWK, const char *s) +{ + if (s[0] == 0) { + MAWK->null_str.ref_cnt++; + return &MAWK->null_str; + } + else { + mawk_string_t *sval = xnew_STRING(MAWK, strlen(s)); + strcpy(sval->str, s); + return sval; + } +} + + +#ifdef DEBUG + +void DB_free_STRING(mawk_state_t *MAWK, register mawk_string_t *sval) +{ + if (--sval->ref_cnt == 0) + mawk_zfree(MAWK, sval, sval->len + STRING_OH); +} + +#endif + + +/************************************************************************** +large block allocation: collect all allocation in a double linked list in +MAWK so they can be easily free'd without any leak at the end of script +execution. Also do memory accounting and don't allocate over preset limit. +***************************************************************************/ + +#define OVERHEAD ((char *)&(r->data) - (char *)(r)) +static void *mawk_mm_link(mawk_state_t *MAWK, mawk_mm_t *r) +{ + r->next = MAWK->mawk_mm_head; + r->prev = NULL; + if (r->next != NULL) + r->next->prev = r; + MAWK->mawk_mm_head = r; + return &r->data; +} + +static void mawk_mm_unlink(mawk_state_t *MAWK, mawk_mm_t *r) +{ + if (r->prev != NULL) + r->prev->next = r->next; + else + MAWK->mawk_mm_head = r->next; + if (r->next != NULL) + r->next->prev = r->prev; +} + +/* store a pointer in the mawk_mm_head list */ +void *mawk_malloc(mawk_state_t *MAWK, int size) +{ + mawk_mm_t *r; + + if ((MAWK->mm_max > 0) && (MAWK->mm_used + size > MAWK->mm_max)) + return NULL; + + r = malloc(OVERHEAD + size); + r->size = OVERHEAD + size; + MAWK->mm_used += r->size; + return mawk_mm_link(MAWK, r); +} + +void *mawk_realloc(mawk_state_t *MAWK, void *ptr, int size) +{ + mawk_mm_t *r; + + /* emulate mawk_malloc() for simpler implementation of growing buff */ + if (ptr == NULL) + return mawk_malloc(MAWK, size); + + r = (mawk_mm_t *)((char *)ptr - OVERHEAD); + + mawk_mm_unlink(MAWK, r); + + if ((MAWK->mm_max > 0) && (MAWK->mm_used - r->size + size > MAWK->mm_max)) + return NULL; + + MAWK->mm_used -= r->size; + r = realloc(r, OVERHEAD + size); + if (r == NULL) + return NULL; + r->size = OVERHEAD + size; + MAWK->mm_used += r->size; + return mawk_mm_link(MAWK, r); +} + +void mawk_free(mawk_state_t *MAWK, void *ptr) +{ + mawk_mm_t *r = (mawk_mm_t *)((char *)ptr - OVERHEAD); + mawk_mm_unlink(MAWK, r); + MAWK->mm_used -= r->size; + free(r); +} + +void mawk_free_all(mawk_state_t *MAWK) +{ + mawk_mm_t *r, *n; + for(r = MAWK->mawk_mm_head; r != NULL; r = n) { + n = r->next; + free(r); + } + MAWK->mawk_mm_head = NULL; + MAWK->mm_used = 0; +} + +char *mawk_strdup(mawk_state_t *MAWK, const char *s) +{ + int l; + char *r; + + if (s == NULL) + return NULL; + + l = strlen(s); + r = mawk_malloc(MAWK, l+1); + memcpy(r, s, l+1); + return r; +} + +char *mawk_strdup_(const char *s) +{ + int l; + char *r; + + if (s == NULL) + return NULL; + + l = strlen(s); + r = malloc(l+1); + memcpy(r, s, l+1); + return r; +} + diff --git a/src/libmawk/memory.h b/src/libmawk/memory.h new file mode 100644 index 0000000..eb8cd6b --- /dev/null +++ b/src/libmawk/memory.h @@ -0,0 +1,64 @@ + +/******************************************** +memory.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef MEMORY_H +#define MEMORY_H + +#include + + +mawk_string_t *mawk_new_STRING(mawk_state_t *, const char *); +mawk_string_t *mawk_new_STRING0(mawk_state_t *, unsigned); + +#ifdef DEBUG +void DB_free_STRING(mawk_state_t *, mawk_string_t *); + +#define free_STRING(s) DB_free_STRING(MAWK, s) + +#else + +#define free_STRING(sval) if ( -- (sval)->ref_cnt == 0 )\ + mawk_zfree(MAWK, sval, (sval)->len+STRING_OH) ; else +#endif + +#ifdef DEBUG +void DB_mawk_eval_overflow(mawk_state_t * MAWK); + +#define inc_sp() if( ++sp == MAWK->eval_stack+EVAL_STACK_SIZE )\ + DB_mawk_eval_overflow(MAWK) +#define inc_mawksp() if( ++(MAWK->sp) == MAWK->eval_stack+EVAL_STACK_SIZE )\ + DB_mawk_eval_overflow(MAWK) +#else + +/* If things are working, the eval stack should not mawk_overflow */ + +#define inc_sp() sp++ +#define inc_mawksp() (MAWK->sp)++ +#endif + + +/* large block allocation */ +void *mawk_malloc(mawk_state_t *MAWK, int size); +void *mawk_realloc(mawk_state_t *MAWK, void *ptr, int size); +void mawk_free(mawk_state_t *MAWK, void *ptr); +void mawk_free_all(mawk_state_t *MAWK); +char *mawk_strdup(mawk_state_t *MAWK, const char *s); + +/* plain malloc() wrappers */ +char *mawk_strdup_(const char *s); + + +#endif /* MEMORY_H */ diff --git a/src/libmawk/missing.c b/src/libmawk/missing.c new file mode 100644 index 0000000..8524e17 --- /dev/null +++ b/src/libmawk/missing.c @@ -0,0 +1,147 @@ + +/* missing.c */ + +#include "nstd.h" + +#ifdef NO_STRCHR +char *strchr(char *s, int c) +{ + if (c == 0) + return s + strlen(s); + + while (*s) { + if (*s == c) + return s; + s++; + } + return (char *) 0; +} + +char *strrchr(char *s, int c) +{ + char *ret = (char *) 0; + + if (c == 0) + return s + strlen(s); + + while (*s) { + if (*s == c) + ret = s; + s++; + } + return ret; +} +#endif /* NO_STRCHR */ + +#ifdef NO_STRERROR +extern int sys_nerr; +extern char *sys_errlist[]; +char *strerror(int n) +{ + return n > 0 & n < sys_nerr ? sys_errlist[n] : ""; +} +#endif + + +#ifdef NO_MEMCPY +PTR memcpy(PTR t, PTR s, size_t n) +{ + char *tt = t; + char *ss = s; + + while (n > 0) { + n--; + *tt++ = *ss++; + } + return t; +} + +int memcmp(PTR t, PTR s, size_t n) +{ + char *tt = t; + char *ss = s; + + while (n > 0) { + if (*tt < *ss) + return -1; + if (*tt > *ss) + return 1; + tt++; + ss++; + n--; + } + return 0; +} + +PTR memset(PTR t, int c, size_t n) +{ + char *tt = (char *) t; + + while (n > 0) { + n--; + *tt++ = c; + } + return t; +} +#endif /* NO_MEMCPY */ + +#ifndef MAWK_NO_FLOAT +#ifdef NO_STRTOD + +/* don't use this unless you really don't have strtod() because + (1) its probably slower than your real strtod() + (2) atof() may call the real strtod() +*/ + +double strtod(const char *s, char **endptr) +{ + register unsigned char *p; + int flag; + double atof(); + + if (endptr) { + p = (unsigned char *) s; + + flag = 0; + while (*p == ' ' || *p == '\t') + p++; + if (*p == '-' || *p == '+') + p++; + while (scan_code[*p] == SC_DIGIT) { + flag++; + p++; + } + if (*p == '.') { + p++; + while (scan_code[*p] == SC_DIGIT) { + flag++; + p++; + } + } + /* done with number part */ + if (flag == 0) { /* no number part */ + *endptr = s; + return MAWK_NUM_ZERO; + } + else + *endptr = (char *) p; + + /* now look for exponent */ + if (*p == 'e' || *p == 'E') { + flag = 0; + p++; + if (*p == '-' || *p == '+') + p++; + while (scan_code[*p] == SC_DIGIT) { + flag++; + p++; + } + if (flag) + *endptr = (char *) p; + } + } + return atof(s); +} +#endif /* no strtod() */ + +#endif /* MAWK_NO_FLOAT */ diff --git a/src/libmawk/nstd.h b/src/libmawk/nstd.h new file mode 100644 index 0000000..e39e1c3 --- /dev/null +++ b/src/libmawk/nstd.h @@ -0,0 +1,53 @@ +/* Never Standard.h + + This has all the prototypes that are supposed to + be in a standard place but never are, and when they are + the standard place isn't standard +*/ + +#ifndef NSTD_H +#define NSTD_H 1 + + +/* types */ +typedef void *PTR; + +#include "conf.h" + +/* stdlib.h */ +#ifndef MAWK_NO_FLOAT +double strtod(const char *, char **); +#endif + +#ifdef MAWK_BROKEN_STDLIB +void free(void *); +PTR malloc(size_t); +PTR realloc(void *, size_t); +void exit(int); +#endif + +char *getenv(const char *); + +/* string.h */ + +int memcmp(const void *, const void *, size_t); +PTR memcpy(void *, const void *, size_t); +PTR memset(void *, int, size_t); +char *strcpy(char *, const char *); +size_t strlen(const char *); +char *strerror(int); + + +#ifdef NO_ERRNO_H +extern int errno; +#else +#include +#endif + +/* math.h */ +/* if have to diddle with errno to get errors from the math library */ +#ifndef STDC_MATHERR +#define STDC_MATHERR (FPE_TRAPS_ON && NO_MATHERR) +#endif + +#endif /* NSTD_H */ diff --git a/src/libmawk/num.h b/src/libmawk/num.h new file mode 100644 index 0000000..8161f2e --- /dev/null +++ b/src/libmawk/num.h @@ -0,0 +1,25 @@ +#ifndef NUM_H +#define NUM_H + +#include "conf.h" +#include "sizes.h" + +typedef enum { /* fixed numbers are important because of the file format */ + MAWK_NUM_ID_DOUBLE = 'd', + MAWK_NUM_ID_INT = 'i' +} mawk_num_id_t; + +extern const mawk_num_id_t mawk_num_id; + +#ifdef MAWK_NO_FLOAT +# include +#else +# include +# include +#endif + +Int mawk_d_to_I(mawk_num_t); +const char *mawk_num_print_spec(mawk_num_t d); + + +#endif /* NUM_H */ diff --git a/src/libmawk/num_double.c b/src/libmawk/num_double.c new file mode 100644 index 0000000..23e2b51 --- /dev/null +++ b/src/libmawk/num_double.c @@ -0,0 +1,68 @@ +/******************************************** +libmawk (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "num.h" +#include "math_wrap.h" + +const mawk_num_id_t mawk_num_id = MAWK_NUM_ID_DOUBLE; + +/* convert a number to Int (this is not as simple as a + mawk_cast because the results are undefined if it won't fit). + Truncate large values to +Max_Int or -Max_Int + Send nans to -Max_Int +*/ + +Int mawk_d_to_I(mawk_num_t d) +{ + if (d >= Max_Int) + return Max_Int; + if (d > -Max_Int) + return (Int) d; + return -Max_Int; +} + +const char *mawk_num_print_spec(mawk_num_t d) +{ + if (P_isnan(d)) + return "nan"; + return NULL; +} + +double P_fmod(double x, double y) +{ + double modf(); + double ipart; + + if (y == 0) + return P_nan(); + PM_BEGIN + modf(x / y, &ipart); + PM_ERROR + return P_nan(); + PM_END + return x - ipart * y; +} + +double mawk_num_pow(double x, double y) +{ + double r; + + if (P_isnan(x) || P_isnan(y)) + return P_nan(); + if ((x < 0.0) && (y != (double)((int)(y+0.5)))) + return P_nan(); + PM_BEGIN + r = pow(x, y); + PM_ERROR + return P_nan(); + PM_END + return r; +} diff --git a/src/libmawk/num_double.h b/src/libmawk/num_double.h new file mode 100644 index 0000000..2d92d4c --- /dev/null +++ b/src/libmawk/num_double.h @@ -0,0 +1,30 @@ +/* number type */ +typedef double mawk_num_t; + +/* constant format differ for floating point and int */ +#define MAWK_NUM_ZERO 0.0 +#define MAWK_NUM_ONE 1.0 + +/* default printf format */ +#define NUM_FMT "%g" + +/* format for the disassembler */ +#define NUM_FMT_DA "%.6g" + +#define mawk_num_sqrt sqrt +#define mawk_num_int(d) ((d) >= MAWK_NUM_ZERO ? floor(d) : ceil(d)) + +#ifdef MAWK_HAVE_SAFE_NAN +#define P_isnan(x) isnan(x) +#define P_nan() nan("nan") +#else +#define NUM_NAN HUGE_VAL +#define P_isnan(x) ((x) == NUM_NAN) +#define P_nan() (NUM_NAN) +#endif + +double mawk_num_pow(double x, double y); +double P_fmod(double x, double y); + +#define strtonum(nptr, endptr) strtod(nptr, endptr) + diff --git a/src/libmawk/num_int.c b/src/libmawk/num_int.c new file mode 100644 index 0000000..b5f7bba --- /dev/null +++ b/src/libmawk/num_int.c @@ -0,0 +1,59 @@ +/******************************************** +libmawk (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "num.h" + +const mawk_num_id_t mawk_num_id = MAWK_NUM_ID_INT; + +/* convert a number to Int; Int is int */ + +Int mawk_d_to_I(mawk_num_t d) +{ + return (Int) d; +} + +const char *mawk_num_print_spec(mawk_num_t d) +{ + if (P_isnan(d)) + return "nan"; + return NULL; +} + +/* slow implementstion */ +mawk_num_t mawk_num_pow(mawk_num_t x, mawk_num_t y) +{ + mawk_num_t a = 1; + + for(; y > 0; y--) + a = a * x; + return a; +} + + +#define abs_macro(x) ((x) < 0 ? -(x) : (x)) + +mawk_num_t mawk_num_sqrt(mawk_num_t x) +{ + mawk_num_t old_guess, guess; + if (P_isnan(x) || (x == 0)) + return x; + if (x < 0) + return P_nan(); + +/* Babylonian method */ + guess = 1; + old_guess = -1; + while(abs_macro(old_guess - guess) > 1) { + old_guess = guess; + guess = (guess + x/guess) / 2; + } + return guess; +} diff --git a/src/libmawk/num_int.h b/src/libmawk/num_int.h new file mode 100644 index 0000000..74b236c --- /dev/null +++ b/src/libmawk/num_int.h @@ -0,0 +1,25 @@ +/* number type */ +typedef int mawk_num_t; + +/* constant format differ for floating point and int */ +#define MAWK_NUM_ZERO 0 +#define MAWK_NUM_ONE 1 +#define NUM_NAN 0x80000000 + +/* default printf format */ +#define NUM_FMT "%d" + +/* format for the disassembler */ +#define NUM_FMT_DA "%d" + +mawk_num_t mawk_num_pow(mawk_num_t x, mawk_num_t y); +mawk_num_t mawk_num_sqrt(mawk_num_t x); +#define mawk_num_int(d) (d) + +#define P_isnan(x) ((x) == NUM_NAN) +#define P_nan(x) (NUM_NAN) + +#define P_isnan_manual(x) P_isnan(x) + +#define strtonum(nptr, endptr) strtol(nptr, endptr, 10) + diff --git a/src/libmawk/parse.c b/src/libmawk/parse.c new file mode 100644 index 0000000..d0280af --- /dev/null +++ b/src/libmawk/parse.c @@ -0,0 +1,3748 @@ +/* A Bison parser, made by GNU Bison 3.0.2. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "3.0.2" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + + +/* Substitute the variable and function names. */ +#define yyparse Mawk_parse +#define yylex Mawk_lex +#define yyerror Mawk_error +#define yydebug Mawk_debug +#define yynerrs Mawk_nerrs + + +/* Copy the first part of user declarations. */ +#line 86 "parse.y" /* yacc.c:339 */ + +#include +#include "mawk.h" +#include "types.h" +#include "symtype.h" +#include "code.h" +#include "memory.h" +#include "bi_funct.h" +#include "bi_vars.h" +#include "jmp.h" +#include "field.h" +#include "files.h" +#include "scan.h" +#include "zmalloc.h" +#include "f2d.h" + + +#define YYMAXDEPTH 200 + +void mawk_eat_nl(mawk_state_t * MAWK, YYSTYPE *lvalp); +static void resize_fblock(mawk_state_t *, FBLOCK *); +static void switch_code_to_main(mawk_state_t *); +static void mawk_code_array(mawk_state_t *, SYMTAB *); +static void mawk_code_call_id(mawk_state_t *, CA_REC *, SYMTAB *); +static void field_A2I(mawk_state_t *MAWK); +static void check_var(mawk_state_t *, SYMTAB *); +static void check_array(mawk_state_t *, SYMTAB *); +static void RE_as_arg(mawk_state_t *MAWK); + +void mawk_parser_include(mawk_state_t *MAWK, void *str); + +#define mawk_code_address(x) \ +do { \ + if (is_local(x)) \ + mawk_code2op(MAWK, L_PUSHA, (x)->offset) ;\ + else \ + code2(MAWK, _PUSHA, (x)->stval.cp); \ +} while(0) + +#define CDP(x) (mawk_code_base+(x)) +/* WARNING: These CDP() calculations become invalid after calls + that might change code_base. Which are: code2(), mawk_code2op(), + code_jmp() and code_pop(). +*/ + +/* this nonsense caters to MSDOS large model */ +#define CODE_FE_PUSHA() mawk_code_ptr->ptr = (PTR) 0 ; code1(FE_PUSHA) + + +#line 122 "y.tab.c" /* yacc.c:339 */ + +# ifndef YY_NULLPTR +# if defined __cplusplus && 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +/* In a future release of Bison, this section will be replaced + by #include "y.tab.h". */ +#ifndef YY_MAWK_Y_TAB_H_INCLUDED +# define YY_MAWK_Y_TAB_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int Mawk_debug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + UNEXPECTED = 258, + BAD_DECIMAL = 259, + NL = 260, + SEMI_COLON = 261, + LBRACE = 262, + RBRACE = 263, + LBOX = 264, + RBOX = 265, + COMMA = 266, + IO_OUT = 267, + ASSIGN = 268, + ADD_ASG = 269, + SUB_ASG = 270, + MUL_ASG = 271, + DIV_ASG = 272, + MOD_ASG = 273, + POW_ASG = 274, + QMARK = 275, + COLON = 276, + OR = 277, + AND = 278, + IN = 279, + MATCH = 280, + EQ = 281, + NEQ = 282, + LT = 283, + LTE = 284, + GT = 285, + GTE = 286, + CAT = 287, + GETLINE = 288, + PLUS = 289, + MINUS = 290, + MUL = 291, + DIV = 292, + MOD = 293, + NOT = 294, + UMINUS = 295, + IO_IN = 296, + PIPE = 297, + POW = 298, + INC_or_DEC = 299, + DOLLAR = 300, + FIELD = 301, + LPAREN = 302, + RPAREN = 303, + DOUBLE = 304, + STRING_ = 305, + RE = 306, + ID = 307, + D_ID = 308, + FUNCT_ID = 309, + C_FUNCT_ID = 310, + BUILTIN = 311, + LENGTH = 312, + PRINT = 313, + PRINTF = 314, + SPLIT = 315, + MATCH_FUNC = 316, + SUB = 317, + GSUB = 318, + DO = 319, + WHILE = 320, + FOR = 321, + BREAK = 322, + CONTINUE = 323, + IF = 324, + ELSE = 325, + DELETE = 326, + BEGIN = 327, + END = 328, + EXIT = 329, + NEXT = 330, + RETURN = 331, + FUNCTION = 332, + INCLUDE = 333 + }; +#endif +/* Tokens. */ +#define UNEXPECTED 258 +#define BAD_DECIMAL 259 +#define NL 260 +#define SEMI_COLON 261 +#define LBRACE 262 +#define RBRACE 263 +#define LBOX 264 +#define RBOX 265 +#define COMMA 266 +#define IO_OUT 267 +#define ASSIGN 268 +#define ADD_ASG 269 +#define SUB_ASG 270 +#define MUL_ASG 271 +#define DIV_ASG 272 +#define MOD_ASG 273 +#define POW_ASG 274 +#define QMARK 275 +#define COLON 276 +#define OR 277 +#define AND 278 +#define IN 279 +#define MATCH 280 +#define EQ 281 +#define NEQ 282 +#define LT 283 +#define LTE 284 +#define GT 285 +#define GTE 286 +#define CAT 287 +#define GETLINE 288 +#define PLUS 289 +#define MINUS 290 +#define MUL 291 +#define DIV 292 +#define MOD 293 +#define NOT 294 +#define UMINUS 295 +#define IO_IN 296 +#define PIPE 297 +#define POW 298 +#define INC_or_DEC 299 +#define DOLLAR 300 +#define FIELD 301 +#define LPAREN 302 +#define RPAREN 303 +#define DOUBLE 304 +#define STRING_ 305 +#define RE 306 +#define ID 307 +#define D_ID 308 +#define FUNCT_ID 309 +#define C_FUNCT_ID 310 +#define BUILTIN 311 +#define LENGTH 312 +#define PRINT 313 +#define PRINTF 314 +#define SPLIT 315 +#define MATCH_FUNC 316 +#define SUB 317 +#define GSUB 318 +#define DO 319 +#define WHILE 320 +#define FOR 321 +#define BREAK 322 +#define CONTINUE 323 +#define IF 324 +#define ELSE 325 +#define DELETE 326 +#define BEGIN 327 +#define END 328 +#define EXIT 329 +#define NEXT 330 +#define RETURN 331 +#define FUNCTION 332 +#define INCLUDE 333 + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE YYSTYPE; +union YYSTYPE +{ +#line 136 "parse.y" /* yacc.c:355 */ + +mawk_cell_t *cp ; +SYMTAB *stp ; +int start ; /* code starting address as offset from code_base */ +PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */ +const BI_REC *bip ; /* ptr to info about a builtin */ +FBLOCK *fbp ; /* ptr to a function block */ +ARG2_REC *arg2p ; +CA_REC *ca_p ; +int ival ; +PTR ptr ; + +#line 331 "y.tab.c" /* yacc.c:355 */ +}; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + +int Mawk_parse (mawk_state_t *MAWK); + +#endif /* !YY_MAWK_Y_TAB_H_INCLUDED */ + +/* Copy the second part of user declarations. */ + +#line 345 "y.tab.c" /* yacc.c:358 */ + +#ifdef short +# undef short +#endif + +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; +#endif + +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif + +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + +#ifndef YY_ATTRIBUTE +# if (defined __GNUC__ \ + && (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \ + || defined __SUNPRO_C && 0x5110 <= __SUNPRO_C +# define YY_ATTRIBUTE(Spec) __attribute__(Spec) +# else +# define YY_ATTRIBUTE(Spec) /* empty */ +# endif +#endif + +#ifndef YY_ATTRIBUTE_PURE +# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__)) +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__)) +#endif + +#if !defined _Noreturn \ + && (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112) +# if defined _MSC_VER && 1200 <= _MSC_VER +# define _Noreturn __declspec (noreturn) +# else +# define _Noreturn YY_ATTRIBUTE ((__noreturn__)) +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(E) ((void) (E)) +#else +# define YYUSE(E) /* empty */ +#endif + +#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 103 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 1147 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 79 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 60 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 186 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 360 + +/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned + by yylex, with out-of-bounds checking. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 333 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, without out-of-bounds checking. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78 +}; + +#if YYDEBUG + /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_uint16 yyrline[] = +{ + 0, 213, 213, 214, 217, 218, 219, 222, 227, 229, + 232, 231, 238, 237, 244, 243, 251, 267, 250, 280, + 282, 288, 289, 295, 296, 300, 301, 303, 305, 311, + 314, 317, 321, 329, 329, 332, 333, 334, 335, 336, + 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 354, 383, 382, 390, + 389, 396, 397, 396, 402, 403, 407, 409, 411, 419, + 423, 427, 428, 429, 430, 431, 432, 433, 435, 437, + 439, 442, 450, 455, 462, 466, 473, 482, 483, 486, + 488, 493, 504, 514, 517, 526, 527, 530, 531, 535, + 539, 544, 548, 549, 556, 561, 565, 569, 577, 582, + 588, 608, 634, 658, 659, 663, 664, 681, 685, 698, + 703, 716, 729, 742, 754, 771, 779, 790, 804, 821, + 823, 832, 846, 848, 852, 856, 857, 858, 859, 860, + 861, 862, 868, 872, 879, 881, 905, 912, 935, 938, + 941, 944, 949, 956, 962, 967, 972, 979, 983, 983, + 983, 985, 989, 997, 1016, 1017, 1021, 1026, 1034, 1042, + 1061, 1084, 1091, 1092, 1095, 1101, 1114, 1126, 1138, 1147, + 1149, 1164, 1166, 1173, 1182, 1188, 1196 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE || 0 +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "UNEXPECTED", "BAD_DECIMAL", "NL", + "SEMI_COLON", "LBRACE", "RBRACE", "LBOX", "RBOX", "COMMA", "IO_OUT", + "ASSIGN", "ADD_ASG", "SUB_ASG", "MUL_ASG", "DIV_ASG", "MOD_ASG", + "POW_ASG", "QMARK", "COLON", "OR", "AND", "IN", "MATCH", "EQ", "NEQ", + "LT", "LTE", "GT", "GTE", "CAT", "GETLINE", "PLUS", "MINUS", "MUL", + "DIV", "MOD", "NOT", "UMINUS", "IO_IN", "PIPE", "POW", "INC_or_DEC", + "DOLLAR", "FIELD", "LPAREN", "RPAREN", "DOUBLE", "STRING_", "RE", "ID", + "D_ID", "FUNCT_ID", "C_FUNCT_ID", "BUILTIN", "LENGTH", "PRINT", "PRINTF", + "SPLIT", "MATCH_FUNC", "SUB", "GSUB", "DO", "WHILE", "FOR", "BREAK", + "CONTINUE", "IF", "ELSE", "DELETE", "BEGIN", "END", "EXIT", "NEXT", + "RETURN", "FUNCTION", "INCLUDE", "$accept", "program", "program_block", + "PA_block", "$@1", "$@2", "$@3", "$@4", "$@5", "block", + "block_or_separator", "statement_list", "statement", "separator", "expr", + "$@6", "$@7", "$@8", "$@9", "cat_expr", "p_expr", "lvalue", "arglist", + "args", "builtin", "mark", "print", "pr_args", "arg2", "pr_direction", + "if_front", "else", "do", "while_front", "for1", "for2", "for3", + "bifunct_target_arr", "lvalue_arrwr", "array_loop_front", "field", + "split_front", "split_back", "re_arg", "return_statement", "getline", + "bifunct_target", "getline_file", "sub_or_gsub", "sub_back", + "function_def", "funct_start", "funct_head", "f_arglist", "f_args", + "outside_error", "call_args", "ca_front", "ca_back", "include", YY_NULLPTR +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[NUM] -- (External) token number corresponding to the + (internal) symbol number NUM (which must be that of a token). */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, + 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, + 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, + 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, + 325, 326, 327, 328, 329, 330, 331, 332, 333 +}; +# endif + +#define YYPACT_NINF -225 + +#define yypact_value_is_default(Yystate) \ + (!!((Yystate) == (-225))) + +#define YYTABLE_NINF -123 + +#define yytable_value_is_error(Yytable_value) \ + (!!((Yytable_value) == (-123))) + + /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int16 yypact[] = +{ + 351, -225, -225, 486, -225, 811, 811, 811, 104, 718, + -225, 842, -225, -225, -225, 219, -225, -225, -225, -225, + -45, -10, -225, -225, -225, -225, 7, -9, 316, -225, + -225, -225, 1074, 811, 153, 1038, -225, 1073, 73, 1, + 32, 811, 4, -225, 48, 11, 48, -225, 30, -225, + -225, -225, -225, -225, 16, 18, 25, 25, 22, 19, + 625, 25, 625, -225, 414, -225, -225, 411, -225, 558, + 558, 558, 656, 558, -225, 842, 20, 120, 37, 120, + 120, 59, 67, -225, -225, -225, 67, -225, 475, 2, + 198, -225, 74, 38, 38, 46, 842, 842, 48, 48, + -225, -225, -225, -225, -225, -225, -225, -225, -225, 43, + 842, 842, 842, 842, 842, 842, 842, 169, 153, 811, + 811, 811, 811, 811, 66, 811, 842, 842, 842, 842, + 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, + 842, 842, 842, 842, 842, 842, 842, -225, 842, -225, + -225, -225, 67, -225, -225, -225, 60, 127, 842, -225, + 71, -225, -225, -225, 842, 687, -225, -225, 842, 25, + -225, 411, -225, -225, 411, 25, -225, -225, -225, 873, + 84, 101, -225, -225, 1041, 749, -225, 948, 163, 130, + 184, 192, 842, -225, 842, 203, -225, 842, 171, -225, + 904, -225, 842, 1095, 1116, -225, -225, 842, 842, 842, + 842, -225, 307, -225, -225, -225, -225, -225, -225, -225, + -225, -225, 216, 216, 120, 120, 120, 249, 196, 559, + 559, 559, 559, 559, 559, 559, 559, 559, 559, 559, + 559, 559, 559, 559, 559, 559, 559, 559, 559, 559, + 960, 231, -225, 559, 230, -225, 197, 233, 975, -225, + 259, 1053, 987, -225, 238, -225, -225, 780, 559, -225, + 237, 239, -225, 558, 208, -225, -225, 1002, 558, 842, + 842, 842, 559, 559, 204, 34, -225, 8, 547, -225, + 202, 209, 842, 559, 487, 626, 260, -225, -225, 842, + 842, -225, 210, -225, 212, -225, -225, 842, -225, 9, + 842, 842, 25, -225, 842, -225, -225, 188, 193, 195, + -225, 313, -225, -225, -225, -225, -225, -225, 217, 169, + -225, 211, 617, -225, 221, 214, 203, 559, 559, -225, + 1014, 226, -225, -225, -225, -225, -225, 842, -225, 249, + -225, -225, -225, 25, 25, 559, 223, -225, -225, -225 +}; + + /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 0, 176, 9, 0, 157, 0, 0, 0, 0, 0, + 129, 0, 66, 67, 70, 68, 93, 93, 93, 92, + 0, 0, 164, 165, 10, 12, 0, 0, 0, 2, + 4, 7, 14, 35, 64, 0, 80, 0, 134, 0, + 152, 0, 0, 5, 0, 0, 0, 8, 0, 33, + 34, 95, 96, 108, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 25, 0, 23, 27, 0, 93, 0, + 0, 0, 0, 0, 31, 0, 68, 78, 134, 79, + 77, 0, 86, 82, 83, 85, 130, 132, 0, 0, + 134, 81, 0, 0, 0, 0, 0, 0, 0, 0, + 170, 171, 186, 1, 3, 16, 61, 57, 59, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 84, 0, 144, + 142, 161, 86, 158, 159, 160, 153, 154, 0, 168, + 172, 6, 20, 28, 0, 0, 29, 30, 0, 93, + 148, 0, 32, 150, 0, 0, 19, 24, 26, 87, + 105, 0, 111, 115, 0, 0, 128, 0, 0, 0, + 0, 0, 0, 69, 0, 0, 133, 0, 181, 177, + 0, 178, 87, 0, 0, 11, 13, 0, 0, 0, + 0, 119, 56, 50, 51, 52, 53, 54, 55, 21, + 15, 22, 71, 72, 73, 74, 75, 155, 76, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 135, 136, 137, 138, 139, 140, 141, + 0, 0, 162, 147, 0, 174, 0, 173, 0, 113, + 68, 0, 0, 126, 0, 149, 151, 0, 89, 97, + 88, 102, 106, 0, 0, 116, 117, 0, 0, 0, + 0, 0, 100, 101, 0, 0, 179, 68, 0, 180, + 0, 0, 0, 17, 0, 58, 60, 156, 145, 0, + 0, 169, 0, 110, 0, 114, 104, 0, 99, 0, + 0, 0, 0, 107, 0, 118, 112, 0, 0, 0, + 120, 123, 183, 185, 182, 184, 91, 143, 0, 0, + 62, 0, 0, 175, 0, 0, 98, 90, 103, 94, + 0, 123, 122, 131, 124, 146, 18, 0, 121, 0, + 166, 163, 127, 0, 0, 63, 0, 125, 109, 167 +}; + + /* YYPGOTO[NTERM-NUM]. */ +static const yytype_int16 yypgoto[] = +{ + -225, -225, 254, -225, -225, -225, -225, -225, -225, 54, + -36, -225, -55, -14, 0, -225, -225, -225, -225, -225, + 61, -7, 95, -50, -225, 91, -225, -225, 35, -225, + -225, -225, -225, -225, -225, -225, -225, -225, 296, -225, + -1, -225, -225, 13, -225, -225, -224, -225, -225, -225, + -225, -225, -225, -225, -225, -225, 215, -225, -225, -225 +}; + + /* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int16 yydefgoto[] = +{ + -1, 28, 29, 30, 98, 99, 117, 207, 329, 63, + 220, 64, 65, 66, 67, 209, 210, 208, 347, 33, + 34, 35, 269, 270, 36, 92, 68, 271, 89, 312, + 69, 273, 70, 71, 72, 185, 278, 154, 37, 73, + 38, 39, 150, 254, 74, 40, 156, 41, 42, 351, + 43, 44, 45, 256, 257, 46, 199, 200, 289, 47 +}; + + /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_int16 yytable[] = +{ + 32, 83, 96, 297, 78, 78, 78, 85, 78, 177, + 90, 88, 148, 194, 180, 181, 182, -93, 186, 322, + 194, -86, -86, -86, -86, -86, -86, -86, 32, -93, + 49, 50, 78, 153, 163, 49, 50, 97, 162, 155, + 78, 102, 166, 167, 321, 310, 170, 172, 173, 149, + 195, 158, 91, 178, 31, 3, 323, 336, 160, 100, + 171, 101, 174, 164, 91, 165, 77, 79, 80, 168, + 87, 169, 184, 151, 90, 187, -93, 9, 10, 81, + 189, 147, 31, 197, 152, 198, 140, 141, 142, 143, + 144, 145, 146, 202, 118, 211, 203, 204, 159, 227, + 161, 252, 157, 221, 9, 10, 81, 93, 94, 95, + 212, 213, 214, 215, 216, 217, 218, 147, 78, 78, + 78, 78, 78, 255, 78, 356, 229, 230, 231, 232, + 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, + 243, 244, 245, 246, 247, 248, 249, 285, 250, 9, + 10, 81, 205, 206, 272, 263, 82, 265, 253, 179, + 266, 163, 124, 125, 258, 261, 274, 188, 262, -123, + 125, 219, 279, 190, 49, 50, 3, 191, 196, 268, + 222, 223, 224, 225, 226, 277, 228, 119, 120, 121, + 122, 123, 282, 280, 283, 124, 125, 268, 341, 310, + 288, 281, 268, 342, 310, 343, 310, 293, 294, 295, + 296, 140, 141, 142, 143, 144, 145, 146, 313, 286, + 153, 348, 310, 316, 353, 310, 155, 284, -93, 317, + 318, 319, -86, -86, -86, -86, -86, -86, -86, 125, + 299, 300, 147, 251, 302, 301, 196, 307, 310, 331, + 326, 311, 121, 122, 123, 314, 320, 335, 124, 125, + 264, 327, 333, 91, 334, 345, 90, 88, -93, 352, + 344, 359, -86, -86, -86, -86, -86, -86, -86, 268, + 268, 268, 104, 304, 109, 110, 111, 112, 113, 114, + 115, 116, 253, 346, 9, 10, 81, 290, 339, 268, + 332, 152, 309, 91, 84, 328, 0, 268, 0, 201, + 337, 338, 0, 0, 340, 221, 103, 1, 0, 0, + 0, 2, 0, 3, 0, 0, -122, -122, -122, -122, + -122, -122, -122, 111, 112, 113, 114, 115, 116, 357, + 358, 0, 153, 0, 0, 0, 0, 355, 155, 4, + 5, 6, 1, 0, 0, 7, 2, 344, 3, 0, + 8, 9, 10, 11, 0, 12, 13, 14, 15, 0, + 16, 17, 18, 19, 0, 0, 20, 21, 22, 23, + 0, 0, 0, 219, 4, 5, 6, 0, 24, 25, + 7, 0, 0, 26, 27, 8, 9, 10, 11, 0, + 12, 13, 14, 15, 0, 16, 17, 18, 19, 0, + 0, 20, 21, 22, 23, 175, 49, 50, 0, 49, + 50, 3, 176, 24, 25, 0, 0, 0, 26, 27, + 0, 106, 0, 107, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 0, 0, 0, 0, 4, 5, 6, + 0, 0, 0, 7, 0, 0, 0, 0, 8, 9, + 10, 11, 0, 12, 13, 14, 15, 0, 16, 17, + 18, 19, 51, 52, 20, 21, 22, 23, 53, 54, + 55, 56, 57, 58, 0, 59, 192, 48, 60, 61, + 62, 49, 50, 3, 0, 106, 0, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 106, 330, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 4, + 5, 6, 0, 193, 0, 7, 0, 0, 0, 0, + 8, 9, 10, 11, 0, 12, 13, 14, 15, 0, + 16, 17, 18, 19, 51, 52, 20, 21, 22, 23, + 53, 54, 55, 56, 57, 58, 0, 59, 324, 175, + 60, 61, 62, 49, 50, 3, 0, 106, 0, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 106, + 0, 107, 108, 109, 110, 111, 112, 113, 114, 115, + 116, 4, 5, 6, 0, 325, 0, 7, 0, 0, + 0, 0, 8, 9, 10, 11, 0, 12, 13, 14, + 15, 0, 16, 17, 18, 19, 51, 52, 20, 21, + 22, 23, 53, 54, 55, 56, 57, 58, 349, 59, + 49, 50, 60, 61, 62, 0, 0, 106, 0, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 108, + 109, 110, 111, 112, 113, 114, 115, 116, 4, 5, + 6, 0, 183, 0, 7, 350, 0, 0, 0, 8, + 9, 10, 11, 0, 12, 13, 14, 15, 0, 16, + 17, 18, 19, 0, 0, 20, 21, 22, 23, 4, + 5, 6, 0, 259, 0, 7, 0, 0, 0, 0, + 8, 9, 10, 11, 0, 12, 13, 14, 15, 0, + 16, 17, 18, 19, 0, 0, 20, 21, 22, 23, + 4, 5, 6, 0, 0, 0, 7, 0, 0, 0, + 0, 8, 9, 10, 11, 0, 12, 13, 14, 260, + 0, 16, 17, 18, 19, 0, 0, 20, 21, 22, + 23, 4, 5, 6, 0, 0, 0, 7, 0, 0, + 0, 0, 8, 9, 10, 75, 0, 12, 13, 14, + 76, 86, 16, 17, 18, 19, 0, 0, 20, 21, + 22, 23, 4, 5, 6, 0, 0, 0, 7, 0, + 0, 0, 0, 8, 9, 10, 11, 276, 12, 13, + 14, 15, 0, 16, 17, 18, 19, 0, 0, 20, + 21, 22, 23, 4, 5, 6, 0, 0, 0, 7, + 0, 0, 0, 0, 8, 9, 10, 11, 308, 12, + 13, 14, 15, 0, 16, 17, 18, 19, 0, 0, + 20, 21, 22, 23, 4, 5, 6, 0, 0, 0, + 7, 0, 0, 0, 0, 8, 9, 10, 75, 0, + 12, 13, 14, 76, 0, 16, 17, 18, 19, 0, + 0, 20, 21, 22, 23, 4, 5, 6, 0, 0, + 0, 7, 0, 0, 0, 0, 8, 9, 10, 11, + 0, 12, 13, 14, 15, 0, 16, 17, 18, 19, + 0, 0, 20, 21, 22, 23, 4, 5, 6, 0, + 0, 0, 7, 0, 0, 0, 0, 8, 9, 10, + 267, 0, 12, 13, 14, 15, 0, 16, 17, 18, + 19, 0, 0, 20, 21, 22, 23, 4, 5, 6, + 0, 0, 0, 7, 0, 0, 0, 0, 8, 9, + 10, 11, 0, 12, 13, 14, 287, 0, 16, 17, + 18, 19, 0, 0, 20, 21, 22, 23, 106, 0, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, + 106, 0, 107, 108, 109, 110, 111, 112, 113, 114, + 115, 116, 0, 0, 0, 106, 193, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 106, 298, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 0, + 0, 0, 106, 303, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 106, 306, 107, 108, 109, 110, + 111, 112, 113, 114, 115, 116, 0, 275, 0, 0, + 315, 126, 127, 128, 129, 130, 131, 132, 0, 305, + 0, 106, 354, 107, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 106, 0, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 105, 133, 134, 135, 136, + 137, 138, 139, 0, 106, 0, 107, 108, 109, 110, + 111, 112, 113, 114, 115, 116, 291, 0, 0, 0, + 0, 0, 0, 0, 0, 106, 0, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 292, 0, 0, + 0, 0, 0, 0, 0, 0, 106, 0, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116 +}; + +static const yytype_int16 yycheck[] = +{ + 0, 8, 47, 227, 5, 6, 7, 8, 9, 64, + 11, 11, 11, 11, 69, 70, 71, 9, 73, 11, + 11, 13, 14, 15, 16, 17, 18, 19, 28, 9, + 5, 6, 33, 40, 48, 5, 6, 47, 8, 40, + 41, 50, 56, 57, 10, 11, 60, 61, 62, 48, + 48, 47, 44, 67, 0, 7, 48, 48, 47, 52, + 60, 54, 62, 47, 44, 47, 5, 6, 7, 47, + 9, 52, 72, 41, 75, 75, 9, 45, 46, 47, + 81, 44, 28, 9, 52, 47, 13, 14, 15, 16, + 17, 18, 19, 47, 33, 52, 96, 97, 44, 33, + 46, 41, 41, 117, 45, 46, 47, 16, 17, 18, + 110, 111, 112, 113, 114, 115, 116, 44, 119, 120, + 121, 122, 123, 52, 125, 349, 126, 127, 128, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, 144, 145, 146, 197, 148, 45, + 46, 47, 98, 99, 70, 169, 52, 171, 158, 68, + 174, 175, 42, 43, 164, 165, 65, 76, 168, 42, + 43, 117, 9, 82, 5, 6, 7, 86, 48, 179, + 119, 120, 121, 122, 123, 185, 125, 34, 35, 36, + 37, 38, 192, 9, 194, 42, 43, 197, 10, 11, + 200, 9, 202, 10, 11, 10, 11, 207, 208, 209, + 210, 13, 14, 15, 16, 17, 18, 19, 273, 48, + 227, 10, 11, 278, 10, 11, 227, 24, 9, 279, + 280, 281, 13, 14, 15, 16, 17, 18, 19, 43, + 9, 11, 44, 152, 11, 48, 48, 9, 11, 299, + 48, 12, 36, 37, 38, 47, 52, 307, 42, 43, + 169, 52, 52, 44, 52, 48, 267, 267, 9, 48, + 44, 48, 13, 14, 15, 16, 17, 18, 19, 279, + 280, 281, 28, 24, 24, 25, 26, 27, 28, 29, + 30, 31, 292, 329, 45, 46, 47, 202, 312, 299, + 300, 52, 267, 44, 8, 292, -1, 307, -1, 94, + 310, 311, -1, -1, 314, 329, 0, 1, -1, -1, + -1, 5, -1, 7, -1, -1, 13, 14, 15, 16, + 17, 18, 19, 26, 27, 28, 29, 30, 31, 353, + 354, -1, 349, -1, -1, -1, -1, 347, 349, 33, + 34, 35, 1, -1, -1, 39, 5, 44, 7, -1, + 44, 45, 46, 47, -1, 49, 50, 51, 52, -1, + 54, 55, 56, 57, -1, -1, 60, 61, 62, 63, + -1, -1, -1, 329, 33, 34, 35, -1, 72, 73, + 39, -1, -1, 77, 78, 44, 45, 46, 47, -1, + 49, 50, 51, 52, -1, 54, 55, 56, 57, -1, + -1, 60, 61, 62, 63, 1, 5, 6, -1, 5, + 6, 7, 8, 72, 73, -1, -1, -1, 77, 78, + -1, 20, -1, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, -1, -1, -1, -1, 33, 34, 35, + -1, -1, -1, 39, -1, -1, -1, -1, 44, 45, + 46, 47, -1, 49, 50, 51, 52, -1, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, -1, 71, 11, 1, 74, 75, + 76, 5, 6, 7, -1, 20, -1, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, + 34, 35, -1, 48, -1, 39, -1, -1, -1, -1, + 44, 45, 46, 47, -1, 49, 50, 51, 52, -1, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, -1, 71, 11, 1, + 74, 75, 76, 5, 6, 7, -1, 20, -1, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 20, + -1, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 33, 34, 35, -1, 48, -1, 39, -1, -1, + -1, -1, 44, 45, 46, 47, -1, 49, 50, 51, + 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 11, 71, + 5, 6, 74, 75, 76, -1, -1, 20, -1, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, + 35, -1, 6, -1, 39, 48, -1, -1, -1, 44, + 45, 46, 47, -1, 49, 50, 51, 52, -1, 54, + 55, 56, 57, -1, -1, 60, 61, 62, 63, 33, + 34, 35, -1, 6, -1, 39, -1, -1, -1, -1, + 44, 45, 46, 47, -1, 49, 50, 51, 52, -1, + 54, 55, 56, 57, -1, -1, 60, 61, 62, 63, + 33, 34, 35, -1, -1, -1, 39, -1, -1, -1, + -1, 44, 45, 46, 47, -1, 49, 50, 51, 52, + -1, 54, 55, 56, 57, -1, -1, 60, 61, 62, + 63, 33, 34, 35, -1, -1, -1, 39, -1, -1, + -1, -1, 44, 45, 46, 47, -1, 49, 50, 51, + 52, 53, 54, 55, 56, 57, -1, -1, 60, 61, + 62, 63, 33, 34, 35, -1, -1, -1, 39, -1, + -1, -1, -1, 44, 45, 46, 47, 48, 49, 50, + 51, 52, -1, 54, 55, 56, 57, -1, -1, 60, + 61, 62, 63, 33, 34, 35, -1, -1, -1, 39, + -1, -1, -1, -1, 44, 45, 46, 47, 48, 49, + 50, 51, 52, -1, 54, 55, 56, 57, -1, -1, + 60, 61, 62, 63, 33, 34, 35, -1, -1, -1, + 39, -1, -1, -1, -1, 44, 45, 46, 47, -1, + 49, 50, 51, 52, -1, 54, 55, 56, 57, -1, + -1, 60, 61, 62, 63, 33, 34, 35, -1, -1, + -1, 39, -1, -1, -1, -1, 44, 45, 46, 47, + -1, 49, 50, 51, 52, -1, 54, 55, 56, 57, + -1, -1, 60, 61, 62, 63, 33, 34, 35, -1, + -1, -1, 39, -1, -1, -1, -1, 44, 45, 46, + 47, -1, 49, 50, 51, 52, -1, 54, 55, 56, + 57, -1, -1, 60, 61, 62, 63, 33, 34, 35, + -1, -1, -1, 39, -1, -1, -1, -1, 44, 45, + 46, 47, -1, 49, 50, 51, 52, -1, 54, 55, + 56, 57, -1, -1, 60, 61, 62, 63, 20, -1, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 20, -1, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, -1, -1, -1, 20, 48, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 20, 48, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, + -1, -1, 20, 48, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 20, 48, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, -1, 6, -1, -1, + 48, 13, 14, 15, 16, 17, 18, 19, -1, 6, + -1, 20, 48, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 20, -1, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 11, 13, 14, 15, 16, + 17, 18, 19, -1, 20, -1, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 11, -1, -1, -1, + -1, -1, -1, -1, -1, 20, -1, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 11, -1, -1, + -1, -1, -1, -1, -1, -1, 20, -1, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 +}; + + /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 1, 5, 7, 33, 34, 35, 39, 44, 45, + 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, + 60, 61, 62, 63, 72, 73, 77, 78, 80, 81, + 82, 88, 93, 98, 99, 100, 103, 117, 119, 120, + 124, 126, 127, 129, 130, 131, 134, 138, 1, 5, + 6, 58, 59, 64, 65, 66, 67, 68, 69, 71, + 74, 75, 76, 88, 90, 91, 92, 93, 105, 109, + 111, 112, 113, 118, 123, 47, 52, 99, 119, 99, + 99, 47, 52, 100, 117, 119, 53, 99, 93, 107, + 119, 44, 104, 104, 104, 104, 47, 47, 83, 84, + 52, 54, 50, 0, 81, 11, 20, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 85, 99, 34, + 35, 36, 37, 38, 42, 43, 13, 14, 15, 16, + 17, 18, 19, 13, 14, 15, 16, 17, 18, 19, + 13, 14, 15, 16, 17, 18, 19, 44, 11, 48, + 121, 41, 52, 100, 116, 119, 125, 99, 47, 88, + 47, 88, 8, 92, 47, 47, 92, 92, 47, 52, + 92, 93, 92, 92, 93, 1, 8, 91, 92, 104, + 91, 91, 91, 6, 93, 114, 91, 93, 104, 119, + 104, 104, 11, 48, 11, 48, 48, 9, 47, 135, + 136, 135, 47, 93, 93, 88, 88, 86, 96, 94, + 95, 52, 93, 93, 93, 93, 93, 93, 93, 88, + 89, 92, 99, 99, 99, 99, 99, 33, 99, 93, + 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, + 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, + 93, 104, 41, 93, 122, 52, 132, 133, 93, 6, + 52, 93, 93, 92, 104, 92, 92, 47, 93, 101, + 102, 106, 70, 110, 65, 6, 48, 93, 115, 9, + 9, 9, 93, 93, 24, 102, 48, 52, 93, 137, + 101, 11, 11, 93, 93, 93, 93, 125, 48, 9, + 11, 48, 11, 48, 24, 6, 48, 9, 48, 107, + 11, 12, 108, 91, 47, 48, 91, 102, 102, 102, + 52, 10, 11, 48, 11, 48, 48, 52, 122, 87, + 21, 102, 93, 52, 52, 102, 48, 93, 93, 92, + 93, 10, 10, 10, 44, 48, 89, 97, 10, 11, + 48, 128, 48, 10, 48, 93, 125, 92, 92, 48 +}; + + /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 79, 80, 80, 81, 81, 81, 82, 82, 82, + 83, 82, 84, 82, 85, 82, 86, 87, 82, 88, + 88, 89, 89, 90, 90, 91, 91, 91, 91, 91, + 91, 91, 91, 92, 92, 93, 93, 93, 93, 93, + 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, + 93, 93, 93, 93, 93, 93, 93, 94, 93, 95, + 93, 96, 97, 93, 98, 98, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 100, 101, 101, 102, + 102, 103, 103, 104, 91, 105, 105, 106, 106, 106, + 107, 107, 108, 108, 109, 91, 110, 91, 111, 91, + 112, 91, 91, 113, 113, 114, 114, 115, 115, 93, + 93, 116, 117, 99, 99, 91, 91, 118, 91, 119, + 119, 119, 119, 119, 99, 93, 93, 93, 93, 93, + 93, 93, 99, 120, 121, 121, 99, 122, 91, 91, + 123, 123, 99, 99, 99, 99, 99, 124, 125, 125, + 125, 126, 126, 99, 127, 127, 128, 128, 129, 130, + 131, 131, 132, 132, 133, 133, 134, 99, 99, 135, + 135, 136, 136, 136, 137, 137, 138 +}; + + /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 1, 2, 1, 1, 2, 1, 1, 1, + 0, 3, 0, 3, 0, 3, 0, 0, 6, 3, + 3, 1, 1, 1, 2, 1, 2, 1, 2, 2, + 2, 1, 2, 1, 1, 1, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 0, 4, 0, + 4, 0, 0, 7, 1, 2, 1, 1, 1, 3, + 1, 3, 3, 3, 3, 3, 3, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 1, 0, 1, 1, + 3, 5, 1, 0, 5, 1, 1, 1, 3, 2, + 3, 3, 0, 2, 4, 2, 1, 4, 1, 7, + 4, 2, 4, 3, 4, 1, 2, 1, 2, 3, + 5, 5, 5, 5, 6, 7, 3, 6, 2, 1, + 2, 6, 2, 3, 1, 3, 3, 3, 3, 3, + 3, 3, 2, 5, 1, 3, 6, 1, 2, 3, + 2, 3, 1, 2, 2, 3, 4, 1, 1, 1, + 1, 2, 3, 6, 1, 1, 1, 3, 2, 4, + 2, 2, 0, 1, 1, 3, 1, 3, 3, 2, + 2, 1, 3, 3, 2, 2, 2 +}; + + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (MAWK, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (0) + +/* Error token number */ +#define YYTERROR 1 +#define YYERRCODE 256 + + + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + +/* This macro is provided for backward compatibility. */ +#ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +#endif + + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value, MAWK); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*----------------------------------------. +| Print this symbol's value on YYOUTPUT. | +`----------------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, mawk_state_t *MAWK) +{ + FILE *yyo = yyoutput; + YYUSE (yyo); + YYUSE (MAWK); + if (!yyvaluep) + return; +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# endif + YYUSE (yytype); +} + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, mawk_state_t *MAWK) +{ + YYFPRINTF (yyoutput, "%s %s (", + yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep, MAWK); + YYFPRINTF (yyoutput, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yytype_int16 *yyssp, YYSTYPE *yyvsp, int yyrule, mawk_state_t *MAWK) +{ + unsigned long int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + yystos[yyssp[yyi + 1 - yynrhs]], + &(yyvsp[(yyi + 1) - (yynrhs)]) + , MAWK); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule, MAWK); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +static YYSIZE_T +yystrlen (const char *yystr) +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +yystpcpy (char *yydest, const char *yysrc) +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return 2 if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, + yytype_int16 *yyssp, int yytoken) +{ + YYSIZE_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); + YYSIZE_T yysize = yysize0; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + /* Internationalized format string. */ + const char *yyformat = YY_NULLPTR; + /* Arguments of yyformat. */ + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + /* Number of reported tokens (one for the "unexpected", one per + "expected"). */ + int yycount = 0; + + /* There are many possibilities here to consider: + - If this state is a consistent state with a default action, then + the only way this function was invoked is if the default action + is an error action. In that case, don't check for expected + tokens because there are none. + - The only way there can be no lookahead present (in yychar) is if + this state is a consistent state with a default action. Thus, + detecting the absence of a lookahead is sufficient to determine + that there is no unexpected or expected token to report. In that + case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this state is a + consistent state with a default action. There might have been a + previous inconsistent state, consistent state with a non-default + action, or user semantic action that manipulated yychar. + - Of course, the expected token list depends on states to have + correct lookahead information, and it depends on the parser not + to perform extra reductions after fetching a lookahead from the + scanner and before detecting a syntax error. Thus, state merging + (from LALR or IELR) and default reductions corrupt the expected + token list. However, the list is correct for canonical LR with + one exception: it will still contain any token that will not be + accepted due to an error action in a later state. + */ + if (yytoken != YYEMPTY) + { + int yyn = yypact[*yyssp]; + yyarg[yycount++] = yytname[yytoken]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + break; + } + yyarg[yycount++] = yytname[yyx]; + { + YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); + if (! (yysize <= yysize1 + && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + } + } + } + + switch (yycount) + { +# define YYCASE_(N, S) \ + case N: \ + yyformat = S; \ + break + YYCASE_(0, YY_("syntax error")); + YYCASE_(1, YY_("syntax error, unexpected %s")); + YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); + YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); + YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); + YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); +# undef YYCASE_ + } + + { + YYSIZE_T yysize1 = yysize + yystrlen (yyformat); + if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return 1; + } + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + } + return 0; +} +#endif /* YYERROR_VERBOSE */ + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, mawk_state_t *MAWK) +{ + YYUSE (yyvaluep); + YYUSE (MAWK); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YYUSE (yytype); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (mawk_state_t *MAWK) +{ +/* The lookahead symbol. */ +int yychar; + + +/* The semantic value of the lookahead symbol. */ +/* Default value used for initialization, for pacifying older GCCs + or non-GCC compilers. */ +YY_INITIAL_VALUE (static YYSTYPE yyval_default;) +YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + + /* Number of syntax errors so far. */ + int yynerrs; + + int yystate; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + + /* The stacks and their tools: + 'yyss': related to states. + 'yyvs': related to semantic values. + + Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs; + YYSTYPE *yyvsp; + + YYSIZE_T yystacksize; + + int yyn; + int yyresult; + /* Lookahead token as an internal (translated) token number. */ + int yytoken = 0; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; +#endif + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + yyssp = yyss = yyssa; + yyvsp = yyvs = yyvsa; + yystacksize = YYINITDEPTH; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = yylex (&yylval, MAWK); + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + + /* Discard the shifted token. */ + yychar = YYEMPTY; + + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 7: +#line 223 "parse.y" /* yacc.c:1646 */ + { /* this do nothing action removes a vacuous warning + from Bison */ + } +#line 1849 "y.tab.c" /* yacc.c:1646 */ + break; + + case 10: +#line 232 "parse.y" /* yacc.c:1646 */ + { mawk_be_setup(MAWK, MAWK->scope = SCOPE_BEGIN) ; } +#line 1855 "y.tab.c" /* yacc.c:1646 */ + break; + + case 11: +#line 235 "parse.y" /* yacc.c:1646 */ + { switch_code_to_main(MAWK) ; } +#line 1861 "y.tab.c" /* yacc.c:1646 */ + break; + + case 12: +#line 238 "parse.y" /* yacc.c:1646 */ + { mawk_be_setup(MAWK, MAWK->scope = SCOPE_END) ; } +#line 1867 "y.tab.c" /* yacc.c:1646 */ + break; + + case 13: +#line 241 "parse.y" /* yacc.c:1646 */ + { switch_code_to_main(MAWK) ; } +#line 1873 "y.tab.c" /* yacc.c:1646 */ + break; + + case 14: +#line 244 "parse.y" /* yacc.c:1646 */ + { mawk_code_jmp(MAWK, _JZ, (INST*)0) ; } +#line 1879 "y.tab.c" /* yacc.c:1646 */ + break; + + case 15: +#line 247 "parse.y" /* yacc.c:1646 */ + { mawk_patch_jmp(MAWK, mawk_code_ptr ) ; } +#line 1885 "y.tab.c" /* yacc.c:1646 */ + break; + + case 16: +#line 251 "parse.y" /* yacc.c:1646 */ + { + INST *p1 = CDP((yyvsp[-1].start)) ; + int len ; + + mawk_code_push(MAWK, p1, mawk_code_ptr - p1, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr = p1 ; + + mawk_code2op(MAWK, _RANGE_CHK, 1) ; + mawk_code_ptr += 3 ; + len = mawk_code_pop(MAWK, mawk_code_ptr) ; + mawk_code_ptr += len ; + code1(_RANGE_STOP) ; + p1 = CDP((yyvsp[-1].start)) ; + p1[2].op = mawk_code_ptr - (p1+1) ; + } +#line 1905 "y.tab.c" /* yacc.c:1646 */ + break; + + case 17: +#line 267 "parse.y" /* yacc.c:1646 */ + { code1(_RANGE_STOP) ; } +#line 1911 "y.tab.c" /* yacc.c:1646 */ + break; + + case 18: +#line 270 "parse.y" /* yacc.c:1646 */ + { + INST *p1 = CDP((yyvsp[-5].start)) ; + + p1[3].op = CDP((yyvsp[0].start)) - (p1+1) ; + p1[4].op = mawk_code_ptr - (p1+1) ; + } +#line 1922 "y.tab.c" /* yacc.c:1646 */ + break; + + case 19: +#line 281 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; } +#line 1928 "y.tab.c" /* yacc.c:1646 */ + break; + + case 20: +#line 283 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; /* does nothing won't be mawk_executed */ + MAWK->print_flag = MAWK->getline_flag = MAWK->paren_cnt = 0 ; + yyerrok ; } +#line 1936 "y.tab.c" /* yacc.c:1646 */ + break; + + case 22: +#line 290 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _PRINT, mawk_f2d(mawk_bi_print)) ; + } +#line 1945 "y.tab.c" /* yacc.c:1646 */ + break; + + case 26: +#line 302 "parse.y" /* yacc.c:1646 */ + { code1(_POP) ; } +#line 1951 "y.tab.c" /* yacc.c:1646 */ + break; + + case 27: +#line 304 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; } +#line 1957 "y.tab.c" /* yacc.c:1646 */ + break; + + case 28: +#line 306 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + MAWK->print_flag = MAWK->getline_flag = 0 ; + MAWK->paren_cnt = 0 ; + yyerrok ; + } +#line 1967 "y.tab.c" /* yacc.c:1646 */ + break; + + case 29: +#line 312 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; mawk_BC_insert(MAWK, 'B', mawk_code_ptr+1) ; + code2(MAWK, _JMP, 0) /* don't use mawk_code_jmp ! */ ; } +#line 1974 "y.tab.c" /* yacc.c:1646 */ + break; + + case 30: +#line 315 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; mawk_BC_insert(MAWK, 'C', mawk_code_ptr+1) ; + code2(MAWK, _JMP, 0) ; } +#line 1981 "y.tab.c" /* yacc.c:1646 */ + break; + + case 31: +#line 318 "parse.y" /* yacc.c:1646 */ + { if ( MAWK->scope != SCOPE_FUNCT ) + mawk_compile_error(MAWK, "return outside function body") ; + } +#line 1989 "y.tab.c" /* yacc.c:1646 */ + break; + + case 32: +#line 322 "parse.y" /* yacc.c:1646 */ + { if ( MAWK->scope != SCOPE_MAIN ) + mawk_compile_error(MAWK, "improper use of next" ) ; + (yyval.start) = mawk_code_offset ; + code1(_NEXT) ; + } +#line 1999 "y.tab.c" /* yacc.c:1646 */ + break; + + case 36: +#line 333 "parse.y" /* yacc.c:1646 */ + { code1(_ASSIGN) ; } +#line 2005 "y.tab.c" /* yacc.c:1646 */ + break; + + case 37: +#line 334 "parse.y" /* yacc.c:1646 */ + { code1(_ADD_ASG) ; } +#line 2011 "y.tab.c" /* yacc.c:1646 */ + break; + + case 38: +#line 335 "parse.y" /* yacc.c:1646 */ + { code1(_SUB_ASG) ; } +#line 2017 "y.tab.c" /* yacc.c:1646 */ + break; + + case 39: +#line 336 "parse.y" /* yacc.c:1646 */ + { code1(_MUL_ASG) ; } +#line 2023 "y.tab.c" /* yacc.c:1646 */ + break; + + case 40: +#line 337 "parse.y" /* yacc.c:1646 */ + { code1(_DIV_ASG) ; } +#line 2029 "y.tab.c" /* yacc.c:1646 */ + break; + + case 41: +#line 338 "parse.y" /* yacc.c:1646 */ + { code1(_MOD_ASG) ; } +#line 2035 "y.tab.c" /* yacc.c:1646 */ + break; + + case 42: +#line 339 "parse.y" /* yacc.c:1646 */ + { code1(_POW_ASG) ; } +#line 2041 "y.tab.c" /* yacc.c:1646 */ + break; + + case 43: +#line 340 "parse.y" /* yacc.c:1646 */ + { code1(_ASSIGN_ARR) ; } +#line 2047 "y.tab.c" /* yacc.c:1646 */ + break; + + case 44: +#line 341 "parse.y" /* yacc.c:1646 */ + { code1(_ADD_ASG_ARR) ; } +#line 2053 "y.tab.c" /* yacc.c:1646 */ + break; + + case 45: +#line 342 "parse.y" /* yacc.c:1646 */ + { code1(_SUB_ASG_ARR) ; } +#line 2059 "y.tab.c" /* yacc.c:1646 */ + break; + + case 46: +#line 343 "parse.y" /* yacc.c:1646 */ + { code1(_MUL_ASG_ARR) ; } +#line 2065 "y.tab.c" /* yacc.c:1646 */ + break; + + case 47: +#line 344 "parse.y" /* yacc.c:1646 */ + { code1(_DIV_ASG_ARR) ; } +#line 2071 "y.tab.c" /* yacc.c:1646 */ + break; + + case 48: +#line 345 "parse.y" /* yacc.c:1646 */ + { code1(_MOD_ASG_ARR) ; } +#line 2077 "y.tab.c" /* yacc.c:1646 */ + break; + + case 49: +#line 346 "parse.y" /* yacc.c:1646 */ + { code1(_POW_ASG_ARR) ; } +#line 2083 "y.tab.c" /* yacc.c:1646 */ + break; + + case 50: +#line 347 "parse.y" /* yacc.c:1646 */ + { code1(_EQ) ; } +#line 2089 "y.tab.c" /* yacc.c:1646 */ + break; + + case 51: +#line 348 "parse.y" /* yacc.c:1646 */ + { code1(_NEQ) ; } +#line 2095 "y.tab.c" /* yacc.c:1646 */ + break; + + case 52: +#line 349 "parse.y" /* yacc.c:1646 */ + { code1(_LT) ; } +#line 2101 "y.tab.c" /* yacc.c:1646 */ + break; + + case 53: +#line 350 "parse.y" /* yacc.c:1646 */ + { code1(_LTE) ; } +#line 2107 "y.tab.c" /* yacc.c:1646 */ + break; + + case 54: +#line 351 "parse.y" /* yacc.c:1646 */ + { code1(_GT) ; } +#line 2113 "y.tab.c" /* yacc.c:1646 */ + break; + + case 55: +#line 352 "parse.y" /* yacc.c:1646 */ + { code1(_GTE) ; } +#line 2119 "y.tab.c" /* yacc.c:1646 */ + break; + + case 56: +#line 355 "parse.y" /* yacc.c:1646 */ + { + INST *p3 = CDP((yyvsp[0].start)) ; + + if ( p3 == mawk_code_ptr - 2 ) + { + if ( p3->op == _MATCH0 ) p3->op = _MATCH1 ; + + else /* check for string */ + if ( p3->op == _PUSHS ) + { + mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + cp->type = C_STRING ; + cp->ptr = p3[1].ptr ; + mawk_cast_to_RE(MAWK, cp) ; + mawk_code_ptr -= 2 ; + code2(MAWK, _MATCH1, cp->ptr) ; + MAWK_ZFREE(MAWK, cp) ; + } + else code1(_MATCH2) ; + } + else code1(_MATCH2) ; + + if ( !(yyvsp[-1].ival) ) code1(_NOT) ; + } +#line 2149 "y.tab.c" /* yacc.c:1646 */ + break; + + case 57: +#line 383 "parse.y" /* yacc.c:1646 */ + { code1(_TEST) ; + mawk_code_jmp(MAWK, _LJNZ, (INST*)0) ; + } +#line 2157 "y.tab.c" /* yacc.c:1646 */ + break; + + case 58: +#line 387 "parse.y" /* yacc.c:1646 */ + { code1(_TEST) ; mawk_patch_jmp(MAWK, mawk_code_ptr) ; } +#line 2163 "y.tab.c" /* yacc.c:1646 */ + break; + + case 59: +#line 390 "parse.y" /* yacc.c:1646 */ + { code1(_TEST) ; + mawk_code_jmp(MAWK, _LJZ, (INST*)0) ; + } +#line 2171 "y.tab.c" /* yacc.c:1646 */ + break; + + case 60: +#line 394 "parse.y" /* yacc.c:1646 */ + { code1(_TEST) ; mawk_patch_jmp(MAWK, mawk_code_ptr) ; } +#line 2177 "y.tab.c" /* yacc.c:1646 */ + break; + + case 61: +#line 396 "parse.y" /* yacc.c:1646 */ + { mawk_code_jmp(MAWK, _JZ, (INST*)0) ; } +#line 2183 "y.tab.c" /* yacc.c:1646 */ + break; + + case 62: +#line 397 "parse.y" /* yacc.c:1646 */ + { mawk_code_jmp(MAWK, _JMP, (INST*)0) ; } +#line 2189 "y.tab.c" /* yacc.c:1646 */ + break; + + case 63: +#line 399 "parse.y" /* yacc.c:1646 */ + { mawk_patch_jmp(MAWK, mawk_code_ptr) ; mawk_patch_jmp(MAWK, CDP((yyvsp[0].start))) ; } +#line 2195 "y.tab.c" /* yacc.c:1646 */ + break; + + case 65: +#line 404 "parse.y" /* yacc.c:1646 */ + { code1(_CAT) ; } +#line 2201 "y.tab.c" /* yacc.c:1646 */ + break; + + case 66: +#line 408 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; code2(MAWK, _PUSHD, (yyvsp[0].ptr)) ; } +#line 2207 "y.tab.c" /* yacc.c:1646 */ + break; + + case 67: +#line 410 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; code2(MAWK, _PUSHS, (yyvsp[0].ptr)) ; } +#line 2213 "y.tab.c" /* yacc.c:1646 */ + break; + + case 68: +#line 412 "parse.y" /* yacc.c:1646 */ + { check_var(MAWK, (yyvsp[0].stp)) ; + (yyval.start) = mawk_code_offset ; + if ( is_local((yyvsp[0].stp)) ) + { mawk_code2op(MAWK, L_PUSHI, (yyvsp[0].stp)->offset) ; } + else code2(MAWK, _PUSHI, (yyvsp[0].stp)->stval.cp) ; + } +#line 2224 "y.tab.c" /* yacc.c:1646 */ + break; + + case 69: +#line 420 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; } +#line 2230 "y.tab.c" /* yacc.c:1646 */ + break; + + case 70: +#line 424 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; code2(MAWK, _MATCH0, (yyvsp[0].ptr)) ; } +#line 2236 "y.tab.c" /* yacc.c:1646 */ + break; + + case 71: +#line 427 "parse.y" /* yacc.c:1646 */ + { code1(_ADD) ; } +#line 2242 "y.tab.c" /* yacc.c:1646 */ + break; + + case 72: +#line 428 "parse.y" /* yacc.c:1646 */ + { code1(_SUB) ; } +#line 2248 "y.tab.c" /* yacc.c:1646 */ + break; + + case 73: +#line 429 "parse.y" /* yacc.c:1646 */ + { code1(_MUL) ; } +#line 2254 "y.tab.c" /* yacc.c:1646 */ + break; + + case 74: +#line 430 "parse.y" /* yacc.c:1646 */ + { code1(_DIV) ; } +#line 2260 "y.tab.c" /* yacc.c:1646 */ + break; + + case 75: +#line 431 "parse.y" /* yacc.c:1646 */ + { code1(_MOD) ; } +#line 2266 "y.tab.c" /* yacc.c:1646 */ + break; + + case 76: +#line 432 "parse.y" /* yacc.c:1646 */ + { code1(_POW) ; } +#line 2272 "y.tab.c" /* yacc.c:1646 */ + break; + + case 77: +#line 434 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; code1(_NOT) ; } +#line 2278 "y.tab.c" /* yacc.c:1646 */ + break; + + case 78: +#line 436 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; code1(_UPLUS) ; } +#line 2284 "y.tab.c" /* yacc.c:1646 */ + break; + + case 79: +#line 438 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; code1(_UMINUS) ; } +#line 2290 "y.tab.c" /* yacc.c:1646 */ + break; + + case 81: +#line 443 "parse.y" /* yacc.c:1646 */ + { check_var(MAWK, (yyvsp[-1].stp)) ; + (yyval.start) = mawk_code_offset ; + mawk_code_address((yyvsp[-1].stp)) ; + + if ( (yyvsp[0].ival) == '+' ) code1(_POST_INC) ; + else code1(_POST_DEC) ; + } +#line 2302 "y.tab.c" /* yacc.c:1646 */ + break; + + case 82: +#line 451 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; + if ( (yyvsp[-1].ival) == '+' ) code1(_PRE_INC) ; + else code1(_PRE_DEC) ; + } +#line 2311 "y.tab.c" /* yacc.c:1646 */ + break; + + case 83: +#line 456 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; + if ( (yyvsp[-1].ival) == '+' ) code1(_PRE_INC_ARR) ; + else code1(_PRE_DEC_ARR) ; + } +#line 2320 "y.tab.c" /* yacc.c:1646 */ + break; + + case 84: +#line 463 "parse.y" /* yacc.c:1646 */ + { if ((yyvsp[0].ival) == '+' ) code1(F_POST_INC ) ; + else code1(F_POST_DEC) ; + } +#line 2328 "y.tab.c" /* yacc.c:1646 */ + break; + + case 85: +#line 467 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; + if ( (yyvsp[-1].ival) == '+' ) code1(F_PRE_INC) ; + else code1( F_PRE_DEC) ; + } +#line 2337 "y.tab.c" /* yacc.c:1646 */ + break; + + case 86: +#line 474 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + check_var(MAWK, (yyvsp[0].stp)) ; + mawk_code_address((yyvsp[0].stp)) ; + } +#line 2346 "y.tab.c" /* yacc.c:1646 */ + break; + + case 87: +#line 482 "parse.y" /* yacc.c:1646 */ + { (yyval.ival) = 0 ; } +#line 2352 "y.tab.c" /* yacc.c:1646 */ + break; + + case 89: +#line 487 "parse.y" /* yacc.c:1646 */ + { (yyval.ival) = 1 ; } +#line 2358 "y.tab.c" /* yacc.c:1646 */ + break; + + case 90: +#line 489 "parse.y" /* yacc.c:1646 */ + { (yyval.ival) = (yyvsp[-2].ival) + 1 ; } +#line 2364 "y.tab.c" /* yacc.c:1646 */ + break; + + case 91: +#line 494 "parse.y" /* yacc.c:1646 */ + { const BI_REC *p = (yyvsp[-4].bip) ; + (yyval.start) = (yyvsp[-3].start) ; + if ( (int)p->min_args > (yyvsp[-1].ival) || (int)p->max_args < (yyvsp[-1].ival) ) + mawk_compile_error( + MAWK, "wrong number of arguments in call to %s" , + p->name ) ; + if ( p->min_args != p->max_args ) /* variable args */ + { code1(_PUSHINT) ; code1((yyvsp[-1].ival)) ; } + code2(MAWK, _BUILTIN , mawk_f2d(p->fp)) ; + } +#line 2379 "y.tab.c" /* yacc.c:1646 */ + break; + + case 92: +#line 505 "parse.y" /* yacc.c:1646 */ + { + (yyval.start) = mawk_code_offset ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _BUILTIN, mawk_f2d((yyvsp[0].bip)->fp)) ; + } +#line 2389 "y.tab.c" /* yacc.c:1646 */ + break; + + case 93: +#line 514 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; } +#line 2395 "y.tab.c" /* yacc.c:1646 */ + break; + + case 94: +#line 518 "parse.y" /* yacc.c:1646 */ + { code2(MAWK, _PRINT, mawk_f2d((yyvsp[-4].fp))) ; + if ( (yyvsp[-4].fp) == mawk_bi_printf && (yyvsp[-2].ival) == 0 ) + mawk_compile_error(MAWK, "no arguments in call to printf") ; + MAWK->print_flag = 0 ; + (yyval.start) = (yyvsp[-3].start) ; + } +#line 2406 "y.tab.c" /* yacc.c:1646 */ + break; + + case 95: +#line 526 "parse.y" /* yacc.c:1646 */ + { (yyval.fp) = mawk_bi_print ; MAWK->print_flag = 1 ;} +#line 2412 "y.tab.c" /* yacc.c:1646 */ + break; + + case 96: +#line 527 "parse.y" /* yacc.c:1646 */ + { (yyval.fp) = mawk_bi_printf ; MAWK->print_flag = 1 ; } +#line 2418 "y.tab.c" /* yacc.c:1646 */ + break; + + case 97: +#line 530 "parse.y" /* yacc.c:1646 */ + { mawk_code2op(MAWK, _PUSHINT, (yyvsp[0].ival)) ; } +#line 2424 "y.tab.c" /* yacc.c:1646 */ + break; + + case 98: +#line 532 "parse.y" /* yacc.c:1646 */ + { (yyval.ival) = (yyvsp[-1].arg2p)->cnt ; mawk_zfree(MAWK, (yyvsp[-1].arg2p),sizeof(ARG2_REC)) ; + mawk_code2op(MAWK, _PUSHINT, (yyval.ival)) ; + } +#line 2432 "y.tab.c" /* yacc.c:1646 */ + break; + + case 99: +#line 536 "parse.y" /* yacc.c:1646 */ + { (yyval.ival)=0 ; mawk_code2op(MAWK, _PUSHINT, 0) ; } +#line 2438 "y.tab.c" /* yacc.c:1646 */ + break; + + case 100: +#line 540 "parse.y" /* yacc.c:1646 */ + { (yyval.arg2p) = (ARG2_REC*) mawk_zmalloc(MAWK, sizeof(ARG2_REC)) ; + (yyval.arg2p)->start = (yyvsp[-2].start) ; + (yyval.arg2p)->cnt = 2 ; + } +#line 2447 "y.tab.c" /* yacc.c:1646 */ + break; + + case 101: +#line 545 "parse.y" /* yacc.c:1646 */ + { (yyval.arg2p) = (yyvsp[-2].arg2p) ; (yyval.arg2p)->cnt++ ; } +#line 2453 "y.tab.c" /* yacc.c:1646 */ + break; + + case 103: +#line 550 "parse.y" /* yacc.c:1646 */ + { mawk_code2op(MAWK, _PUSHINT, (yyvsp[-1].ival)) ; } +#line 2459 "y.tab.c" /* yacc.c:1646 */ + break; + + case 104: +#line 557 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; mawk_eat_nl(MAWK, &yylval) ; mawk_code_jmp(MAWK, _JZ, (INST*)0) ; } +#line 2465 "y.tab.c" /* yacc.c:1646 */ + break; + + case 105: +#line 562 "parse.y" /* yacc.c:1646 */ + { mawk_patch_jmp(MAWK, mawk_code_ptr ) ; } +#line 2471 "y.tab.c" /* yacc.c:1646 */ + break; + + case 106: +#line 565 "parse.y" /* yacc.c:1646 */ + { mawk_eat_nl(MAWK, &yylval) ; mawk_code_jmp(MAWK, _JMP, (INST*)0) ; } +#line 2477 "y.tab.c" /* yacc.c:1646 */ + break; + + case 107: +#line 570 "parse.y" /* yacc.c:1646 */ + { mawk_patch_jmp(MAWK, mawk_code_ptr) ; + mawk_patch_jmp(MAWK, CDP((yyvsp[0].start))) ; + } +#line 2485 "y.tab.c" /* yacc.c:1646 */ + break; + + case 108: +#line 578 "parse.y" /* yacc.c:1646 */ + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; } +#line 2491 "y.tab.c" /* yacc.c:1646 */ + break; + + case 109: +#line 583 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-5].start) ; + mawk_code_jmp(MAWK, _JNZ, CDP((yyvsp[-5].start))) ; + mawk_BC_clear(MAWK, mawk_code_ptr, CDP((yyvsp[-2].start))) ; } +#line 2499 "y.tab.c" /* yacc.c:1646 */ + break; + + case 110: +#line 589 "parse.y" /* yacc.c:1646 */ + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + (yyval.start) = (yyvsp[-1].start) ; + + /* check if const expression */ + if ( mawk_code_ptr - 2 == CDP((yyvsp[-1].start)) && + mawk_code_ptr[-2].op == _PUSHD && + *(double*)mawk_code_ptr[-1].ptr != 0.0 + ) + mawk_code_ptr -= 2 ; + else + { INST *p3 = CDP((yyvsp[-1].start)) ; + mawk_code_push(MAWK, p3, mawk_code_ptr-p3, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr = p3 ; + code2(MAWK, _JMP, (INST*)0) ; /* code2() not mawk_code_jmp() */ + } + } +#line 2520 "y.tab.c" /* yacc.c:1646 */ + break; + + case 111: +#line 609 "parse.y" /* yacc.c:1646 */ + { + int saved_offset ; + int len ; + INST *p1 = CDP((yyvsp[-1].start)) ; + INST *p2 = CDP((yyvsp[0].start)) ; + + if ( p1 != p2 ) /* real mawk_test in loop */ + { + p1[1].op = mawk_code_ptr-(p1+1) ; + saved_offset = mawk_code_offset ; + len = mawk_code_pop(MAWK, mawk_code_ptr) ; + mawk_code_ptr += len ; + mawk_code_jmp(MAWK, _JNZ, CDP((yyvsp[0].start))) ; + mawk_BC_clear(MAWK, mawk_code_ptr, CDP(saved_offset)) ; + } + else /* while(1) */ + { + mawk_code_jmp(MAWK, _JMP, p1) ; + mawk_BC_clear(MAWK, mawk_code_ptr, CDP((yyvsp[0].start))) ; + } + } +#line 2546 "y.tab.c" /* yacc.c:1646 */ + break; + + case 112: +#line 635 "parse.y" /* yacc.c:1646 */ + { + int cont_offset = mawk_code_offset ; + unsigned len = mawk_code_pop(MAWK, mawk_code_ptr) ; + INST *p2 = CDP((yyvsp[-2].start)) ; + INST *p4 = CDP((yyvsp[0].start)) ; + + mawk_code_ptr += len ; + + if ( p2 != p4 ) /* real mawk_test in for2 */ + { + p4[-1].op = mawk_code_ptr - p4 + 1 ; + len = mawk_code_pop(MAWK, mawk_code_ptr) ; + mawk_code_ptr += len ; + mawk_code_jmp(MAWK, _JNZ, CDP((yyvsp[0].start))) ; + } + else /* for(;;) */ + mawk_code_jmp(MAWK, _JMP, p4) ; + + mawk_BC_clear(MAWK, mawk_code_ptr, CDP(cont_offset)) ; + + } +#line 2572 "y.tab.c" /* yacc.c:1646 */ + break; + + case 113: +#line 658 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; } +#line 2578 "y.tab.c" /* yacc.c:1646 */ + break; + + case 114: +#line 660 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; code1(_POP) ; } +#line 2584 "y.tab.c" /* yacc.c:1646 */ + break; + + case 115: +#line 663 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; } +#line 2590 "y.tab.c" /* yacc.c:1646 */ + break; + + case 116: +#line 665 "parse.y" /* yacc.c:1646 */ + { + if ( mawk_code_ptr - 2 == CDP((yyvsp[-1].start)) && + mawk_code_ptr[-2].op == _PUSHD && + * (double*) mawk_code_ptr[-1].ptr != 0.0 + ) + mawk_code_ptr -= 2 ; + else + { + INST *p1 = CDP((yyvsp[-1].start)) ; + mawk_code_push(MAWK, p1, mawk_code_ptr-p1, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr = p1 ; + code2(MAWK, _JMP, (INST*)0) ; + } + } +#line 2609 "y.tab.c" /* yacc.c:1646 */ + break; + + case 117: +#line 682 "parse.y" /* yacc.c:1646 */ + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + mawk_code_push(MAWK, (INST*)0,0, MAWK->scope, MAWK->active_funct) ; + } +#line 2617 "y.tab.c" /* yacc.c:1646 */ + break; + + case 118: +#line 686 "parse.y" /* yacc.c:1646 */ + { INST *p1 = CDP((yyvsp[-1].start)) ; + + mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + code1(_POP) ; + mawk_code_push(MAWK, p1, mawk_code_ptr - p1, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr -= mawk_code_ptr - p1 ; + } +#line 2629 "y.tab.c" /* yacc.c:1646 */ + break; + + case 119: +#line 699 "parse.y" /* yacc.c:1646 */ + { check_array(MAWK, (yyvsp[0].stp)) ; + mawk_code_array(MAWK, (yyvsp[0].stp)) ; + code1(A_TEST) ; + } +#line 2638 "y.tab.c" /* yacc.c:1646 */ + break; + + case 120: +#line 704 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-3].arg2p)->start ; + mawk_code2op(MAWK, A_CAT, (yyvsp[-3].arg2p)->cnt) ; + mawk_zfree(MAWK, (yyvsp[-3].arg2p), sizeof(ARG2_REC)) ; + + check_array(MAWK, (yyvsp[0].stp)) ; + mawk_code_array(MAWK, (yyvsp[0].stp)) ; + code1(A_TEST) ; + } +#line 2651 "y.tab.c" /* yacc.c:1646 */ + break; + + case 121: +#line 717 "parse.y" /* yacc.c:1646 */ + { + if ( (yyvsp[-1].ival) > 1 ) + { mawk_code2op(MAWK, A_CAT, (yyvsp[-1].ival)) ; } + + check_array(MAWK, (yyvsp[-4].stp)) ; + if( is_local((yyvsp[-4].stp)) ) + { mawk_code2op(MAWK, LAE_PUSHA, (yyvsp[-4].stp)->offset) ; } + else code2(MAWK, AE_PUSHA, (yyvsp[-4].stp)->stval.array) ; + (yyval.start) = (yyvsp[-3].start) ; + } +#line 2666 "y.tab.c" /* yacc.c:1646 */ + break; + + case 122: +#line 730 "parse.y" /* yacc.c:1646 */ + { + if ( (yyvsp[-1].ival) > 1 ) + { mawk_code2op(MAWK, A_CAT, (yyvsp[-1].ival)) ; } + + check_array(MAWK, (yyvsp[-4].stp)) ; + if( is_local((yyvsp[-4].stp)) ) + { mawk_code2op(MAWK, LAE_PUSHA_WRARR, (yyvsp[-4].stp)->offset) ; } + else code2(MAWK, AE_PUSHA_WRARR, (yyvsp[-4].stp)->stval.array) ; + (yyval.start) = (yyvsp[-3].start) ; + } +#line 2681 "y.tab.c" /* yacc.c:1646 */ + break; + + case 123: +#line 743 "parse.y" /* yacc.c:1646 */ + { + if ( (yyvsp[-1].ival) > 1 ) + { mawk_code2op(MAWK, A_CAT, (yyvsp[-1].ival)) ; } + + check_array(MAWK, (yyvsp[-4].stp)) ; + if( is_local((yyvsp[-4].stp)) ) + { mawk_code2op(MAWK, LAE_PUSHI, (yyvsp[-4].stp)->offset) ; } + else code2(MAWK, AE_PUSHI, (yyvsp[-4].stp)->stval.array) ; + (yyval.start) = (yyvsp[-3].start) ; + } +#line 2696 "y.tab.c" /* yacc.c:1646 */ + break; + + case 124: +#line 755 "parse.y" /* yacc.c:1646 */ + { + if ( (yyvsp[-2].ival) > 1 ) + { mawk_code2op(MAWK, A_CAT,(yyvsp[-2].ival)) ; } + + check_array(MAWK, (yyvsp[-5].stp)) ; + if( is_local((yyvsp[-5].stp)) ) + { mawk_code2op(MAWK, LAE_PUSHA_WRARR, (yyvsp[-5].stp)->offset) ; } + else code2(MAWK, AE_PUSHA_WRARR, (yyvsp[-5].stp)->stval.array) ; + if ( (yyvsp[0].ival) == '+' ) code1(_POST_INC_ARR) ; + else code1(_POST_DEC_ARR) ; + + (yyval.start) = (yyvsp[-4].start) ; + } +#line 2714 "y.tab.c" /* yacc.c:1646 */ + break; + + case 125: +#line 772 "parse.y" /* yacc.c:1646 */ + { + (yyval.start) = (yyvsp[-4].start) ; + if ( (yyvsp[-2].ival) > 1 ) { mawk_code2op(MAWK, A_CAT, (yyvsp[-2].ival)) ; } + check_array(MAWK, (yyvsp[-5].stp)) ; + mawk_code_array(MAWK, (yyvsp[-5].stp)) ; + code1(A_DEL) ; + } +#line 2726 "y.tab.c" /* yacc.c:1646 */ + break; + + case 126: +#line 780 "parse.y" /* yacc.c:1646 */ + { + (yyval.start) = mawk_code_offset ; + check_array(MAWK, (yyvsp[-1].stp)) ; + mawk_code_array(MAWK, (yyvsp[-1].stp)) ; + code1(DEL_A) ; + } +#line 2737 "y.tab.c" /* yacc.c:1646 */ + break; + + case 127: +#line 791 "parse.y" /* yacc.c:1646 */ + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + (yyval.start) = mawk_code_offset ; + + check_var(MAWK, (yyvsp[-3].stp)) ; + mawk_code_address((yyvsp[-3].stp)) ; + check_array(MAWK, (yyvsp[-1].stp)) ; + mawk_code_array(MAWK, (yyvsp[-1].stp)) ; + + code2(MAWK, SET_ALOOP, (INST*)0) ; + } +#line 2752 "y.tab.c" /* yacc.c:1646 */ + break; + + case 128: +#line 805 "parse.y" /* yacc.c:1646 */ + { + INST *p2 = CDP((yyvsp[0].start)) ; + + p2[-1].op = mawk_code_ptr - p2 + 1 ; + mawk_BC_clear(MAWK, mawk_code_ptr+2 , mawk_code_ptr) ; + mawk_code_jmp(MAWK, ALOOP, p2) ; + code1(POP_AL) ; + } +#line 2765 "y.tab.c" /* yacc.c:1646 */ + break; + + case 129: +#line 822 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; code2(MAWK, F_PUSHA, (yyvsp[0].cp)) ; } +#line 2771 "y.tab.c" /* yacc.c:1646 */ + break; + + case 130: +#line 824 "parse.y" /* yacc.c:1646 */ + { check_var(MAWK, (yyvsp[0].stp)) ; + (yyval.start) = mawk_code_offset ; + if ( is_local((yyvsp[0].stp)) ) + { mawk_code2op(MAWK, L_PUSHI, (yyvsp[0].stp)->offset) ; } + else code2(MAWK, _PUSHI, (yyvsp[0].stp)->stval.cp) ; + + CODE_FE_PUSHA() ; + } +#line 2784 "y.tab.c" /* yacc.c:1646 */ + break; + + case 131: +#line 833 "parse.y" /* yacc.c:1646 */ + { + if ( (yyvsp[-1].ival) > 1 ) + { mawk_code2op(MAWK, A_CAT, (yyvsp[-1].ival)) ; } + + check_array(MAWK, (yyvsp[-4].stp)) ; + if( is_local((yyvsp[-4].stp)) ) + { mawk_code2op(MAWK, LAE_PUSHI, (yyvsp[-4].stp)->offset) ; } + else code2(MAWK, AE_PUSHI, (yyvsp[-4].stp)->stval.array) ; + + CODE_FE_PUSHA() ; + + (yyval.start) = (yyvsp[-3].start) ; + } +#line 2802 "y.tab.c" /* yacc.c:1646 */ + break; + + case 132: +#line 847 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; CODE_FE_PUSHA() ; } +#line 2808 "y.tab.c" /* yacc.c:1646 */ + break; + + case 133: +#line 849 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; } +#line 2814 "y.tab.c" /* yacc.c:1646 */ + break; + + case 134: +#line 853 "parse.y" /* yacc.c:1646 */ + { field_A2I(MAWK) ; } +#line 2820 "y.tab.c" /* yacc.c:1646 */ + break; + + case 135: +#line 856 "parse.y" /* yacc.c:1646 */ + { code1(F_ASSIGN) ; } +#line 2826 "y.tab.c" /* yacc.c:1646 */ + break; + + case 136: +#line 857 "parse.y" /* yacc.c:1646 */ + { code1(F_ADD_ASG) ; } +#line 2832 "y.tab.c" /* yacc.c:1646 */ + break; + + case 137: +#line 858 "parse.y" /* yacc.c:1646 */ + { code1(F_SUB_ASG) ; } +#line 2838 "y.tab.c" /* yacc.c:1646 */ + break; + + case 138: +#line 859 "parse.y" /* yacc.c:1646 */ + { code1(F_MUL_ASG) ; } +#line 2844 "y.tab.c" /* yacc.c:1646 */ + break; + + case 139: +#line 860 "parse.y" /* yacc.c:1646 */ + { code1(F_DIV_ASG) ; } +#line 2850 "y.tab.c" /* yacc.c:1646 */ + break; + + case 140: +#line 861 "parse.y" /* yacc.c:1646 */ + { code1(F_MOD_ASG) ; } +#line 2856 "y.tab.c" /* yacc.c:1646 */ + break; + + case 141: +#line 862 "parse.y" /* yacc.c:1646 */ + { code1(F_POW_ASG) ; } +#line 2862 "y.tab.c" /* yacc.c:1646 */ + break; + + case 142: +#line 869 "parse.y" /* yacc.c:1646 */ + { code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_split)) ; } +#line 2868 "y.tab.c" /* yacc.c:1646 */ + break; + + case 143: +#line 873 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-2].start) ; + check_array(MAWK, (yyvsp[0].stp)) ; + mawk_code_array(MAWK, (yyvsp[0].stp)) ; + } +#line 2877 "y.tab.c" /* yacc.c:1646 */ + break; + + case 144: +#line 880 "parse.y" /* yacc.c:1646 */ + { code2(MAWK, _PUSHI, &MAWK->fs_shadow) ; } +#line 2883 "y.tab.c" /* yacc.c:1646 */ + break; + + case 145: +#line 882 "parse.y" /* yacc.c:1646 */ + { + if ( CDP((yyvsp[-1].start)) == mawk_code_ptr - 2 ) + { + if ( mawk_code_ptr[-2].op == _MATCH0 ) + RE_as_arg(MAWK) ; + else + if ( mawk_code_ptr[-2].op == _PUSHS ) + { mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + cp->type = C_STRING ; + cp->ptr = mawk_code_ptr[-1].ptr ; + mawk_cast_for_split(MAWK, cp) ; + mawk_code_ptr[-2].op = _PUSHC ; + mawk_code_ptr[-1].ptr = (PTR) cp ; + } + } + } +#line 2905 "y.tab.c" /* yacc.c:1646 */ + break; + + case 146: +#line 906 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-3].start) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_match)) ; + } +#line 2913 "y.tab.c" /* yacc.c:1646 */ + break; + + case 147: +#line 913 "parse.y" /* yacc.c:1646 */ + { + INST *p1 = CDP((yyvsp[0].start)) ; + + if ( p1 == mawk_code_ptr - 2 ) + { + if ( p1->op == _MATCH0 ) RE_as_arg(MAWK) ; + else + if ( p1->op == _PUSHS ) + { mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + cp->type = C_STRING ; + cp->ptr = p1[1].ptr ; + mawk_cast_to_RE(MAWK, cp) ; + p1->op = _PUSHC ; + p1[1].ptr = (PTR) cp ; + } + } + } +#line 2936 "y.tab.c" /* yacc.c:1646 */ + break; + + case 148: +#line 936 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + code1(_EXIT0) ; } +#line 2943 "y.tab.c" /* yacc.c:1646 */ + break; + + case 149: +#line 939 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; code1(_EXIT) ; } +#line 2949 "y.tab.c" /* yacc.c:1646 */ + break; + + case 150: +#line 942 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + code1(_RET0) ; } +#line 2956 "y.tab.c" /* yacc.c:1646 */ + break; + + case 151: +#line 945 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; code1(_RET) ; } +#line 2962 "y.tab.c" /* yacc.c:1646 */ + break; + + case 152: +#line 950 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + code2(MAWK, F_PUSHA, &MAWK->field[0]) ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + MAWK->getline_flag = 0 ; + } +#line 2973 "y.tab.c" /* yacc.c:1646 */ + break; + + case 153: +#line 957 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[0].start) ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + MAWK->getline_flag = 0 ; + } +#line 2983 "y.tab.c" /* yacc.c:1646 */ + break; + + case 154: +#line 963 "parse.y" /* yacc.c:1646 */ + { code1(_PUSHINT) ; code1(F_IN) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + /* getline_flag already off in yylex() */ + } +#line 2992 "y.tab.c" /* yacc.c:1646 */ + break; + + case 155: +#line 968 "parse.y" /* yacc.c:1646 */ + { code2(MAWK, F_PUSHA, &MAWK->field[0]) ; + code1(_PUSHINT) ; code1(PIPE_IN) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + } +#line 3001 "y.tab.c" /* yacc.c:1646 */ + break; + + case 156: +#line 973 "parse.y" /* yacc.c:1646 */ + { + code1(_PUSHINT) ; code1(PIPE_IN) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + } +#line 3010 "y.tab.c" /* yacc.c:1646 */ + break; + + case 157: +#line 979 "parse.y" /* yacc.c:1646 */ + { MAWK->getline_flag = 1 ; } +#line 3016 "y.tab.c" /* yacc.c:1646 */ + break; + + case 161: +#line 986 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + code2(MAWK, F_PUSHA, MAWK->field+0) ; + } +#line 3024 "y.tab.c" /* yacc.c:1646 */ + break; + + case 162: +#line 990 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; } +#line 3030 "y.tab.c" /* yacc.c:1646 */ + break; + + case 163: +#line 998 "parse.y" /* yacc.c:1646 */ + { + INST *p5 = CDP((yyvsp[-1].start)) ; + INST *p6 = CDP((yyvsp[0].start)) ; + + if ( p6 - p5 == 2 && p5->op == _PUSHS ) + { /* cast from STRING to REPL at compile time */ + mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + cp->type = C_STRING ; + cp->ptr = p5[1].ptr ; + mawk_cast_to_REPL(MAWK, cp) ; + p5->op = _PUSHC ; + p5[1].ptr = (PTR) cp ; + } + code2(MAWK, _BUILTIN, mawk_f2d((yyvsp[-5].fp))) ; + (yyval.start) = (yyvsp[-3].start) ; + } +#line 3051 "y.tab.c" /* yacc.c:1646 */ + break; + + case 164: +#line 1016 "parse.y" /* yacc.c:1646 */ + { (yyval.fp) = mawk_bi_sub ; } +#line 3057 "y.tab.c" /* yacc.c:1646 */ + break; + + case 165: +#line 1017 "parse.y" /* yacc.c:1646 */ + { (yyval.fp) = mawk_bi_gsub ; } +#line 3063 "y.tab.c" /* yacc.c:1646 */ + break; + + case 166: +#line 1022 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = mawk_code_offset ; + code2(MAWK, F_PUSHA, &MAWK->field[0]) ; + } +#line 3071 "y.tab.c" /* yacc.c:1646 */ + break; + + case 167: +#line 1027 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; } +#line 3077 "y.tab.c" /* yacc.c:1646 */ + break; + + case 168: +#line 1035 "parse.y" /* yacc.c:1646 */ + { + resize_fblock(MAWK, (yyvsp[-1].fbp)) ; + mawk_restore_ids(MAWK) ; + switch_code_to_main(MAWK) ; + } +#line 3087 "y.tab.c" /* yacc.c:1646 */ + break; + + case 169: +#line 1043 "parse.y" /* yacc.c:1646 */ + { mawk_eat_nl(MAWK, &yylval) ; + MAWK->scope = SCOPE_FUNCT ; + MAWK->active_funct = (yyvsp[-3].fbp) ; + *MAWK->main_code_p = MAWK->active_code ; + + (yyvsp[-3].fbp)->nargs = (yyvsp[-1].ival) ; + if ( (yyvsp[-1].ival) ) + (yyvsp[-3].fbp)->typev = (char *) + memset( mawk_zmalloc(MAWK, (yyvsp[-1].ival)), ST_LOCAL_NONE, (yyvsp[-1].ival)) ; + else (yyvsp[-3].fbp)->typev = (char *) 0 ; + + mawk_code_ptr = mawk_code_base = + (INST *) mawk_zmalloc(MAWK, INST_BYTES(PAGESZ)); + mawk_code_limit = mawk_code_base + PAGESZ ; + mawk_code_warn = mawk_code_limit - CODEWARN ; + } +#line 3108 "y.tab.c" /* yacc.c:1646 */ + break; + + case 170: +#line 1062 "parse.y" /* yacc.c:1646 */ + { FBLOCK *fbp ; + + if ( (yyvsp[0].stp)->type == ST_NONE ) + { + (yyvsp[0].stp)->type = ST_FUNCT ; + fbp = (yyvsp[0].stp)->stval.fbp = + (FBLOCK *) mawk_zmalloc(MAWK, sizeof(FBLOCK)) ; + fbp->name = (yyvsp[0].stp)->name ; + fbp->code = (INST*) 0 ; + } + else + { + mawk_type_error(MAWK, (yyvsp[0].stp) ) ; + + /* this FBLOCK will not be put in + the symbol table */ + fbp = (FBLOCK*) mawk_zmalloc(MAWK, sizeof(FBLOCK)) ; + fbp->name = "" ; + } + (yyval.fbp) = fbp ; + } +#line 3134 "y.tab.c" /* yacc.c:1646 */ + break; + + case 171: +#line 1085 "parse.y" /* yacc.c:1646 */ + { (yyval.fbp) = (yyvsp[0].fbp) ; + if ( (yyvsp[0].fbp)->code ) + mawk_compile_error(MAWK, "redefinition of %s" , (yyvsp[0].fbp)->name) ; + } +#line 3143 "y.tab.c" /* yacc.c:1646 */ + break; + + case 172: +#line 1091 "parse.y" /* yacc.c:1646 */ + { (yyval.ival) = 0 ; } +#line 3149 "y.tab.c" /* yacc.c:1646 */ + break; + + case 174: +#line 1096 "parse.y" /* yacc.c:1646 */ + { (yyvsp[0].stp) = mawk_save_id(MAWK, (yyvsp[0].stp)->name) ; + (yyvsp[0].stp)->type = ST_LOCAL_NONE ; + (yyvsp[0].stp)->offset = 0 ; + (yyval.ival) = 1 ; + } +#line 3159 "y.tab.c" /* yacc.c:1646 */ + break; + + case 175: +#line 1102 "parse.y" /* yacc.c:1646 */ + { if ( is_local((yyvsp[0].stp)) ) + mawk_compile_error(MAWK, "%s is duplicated in argument list", + (yyvsp[0].stp)->name) ; + else + { (yyvsp[0].stp) = mawk_save_id(MAWK, (yyvsp[0].stp)->name) ; + (yyvsp[0].stp)->type = ST_LOCAL_NONE ; + (yyvsp[0].stp)->offset = (yyvsp[-2].ival) ; + (yyval.ival) = (yyvsp[-2].ival) + 1 ; + } + } +#line 3174 "y.tab.c" /* yacc.c:1646 */ + break; + + case 176: +#line 1115 "parse.y" /* yacc.c:1646 */ + { /* we may have to recover from a bungled function + definition */ + /* can have local ids, before code scope + changes */ + mawk_restore_ids(MAWK) ; + + switch_code_to_main(MAWK) ; + } +#line 3187 "y.tab.c" /* yacc.c:1646 */ + break; + + case 177: +#line 1127 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; + code2(MAWK, _CALL, (yyvsp[-2].fbp)) ; + + if ( (yyvsp[0].ca_p) ) code1((yyvsp[0].ca_p)->arg_num+1) ; + else code1(0) ; + + mawk_check_fcall(MAWK, (yyvsp[-2].fbp), MAWK->scope, MAWK->ps.code_move_level, MAWK->active_funct, + (yyvsp[0].ca_p), MAWK->token_lineno) ; + } +#line 3201 "y.tab.c" /* yacc.c:1646 */ + break; + + case 178: +#line 1139 "parse.y" /* yacc.c:1646 */ + { (yyval.start) = (yyvsp[-1].start) ; + code2(MAWK, _CALL, (yyvsp[-2].ptr)) ; + + if ( (yyvsp[0].ca_p) ) code1((yyvsp[0].ca_p)->arg_num+1) ; + else code1(0) ; + } +#line 3212 "y.tab.c" /* yacc.c:1646 */ + break; + + case 179: +#line 1148 "parse.y" /* yacc.c:1646 */ + { (yyval.ca_p) = (CA_REC *) 0 ; } +#line 3218 "y.tab.c" /* yacc.c:1646 */ + break; + + case 180: +#line 1150 "parse.y" /* yacc.c:1646 */ + { (yyval.ca_p) = (yyvsp[0].ca_p) ; + (yyval.ca_p)->link = (yyvsp[-1].ca_p) ; + (yyval.ca_p)->arg_num = (yyvsp[-1].ca_p) ? (yyvsp[-1].ca_p)->arg_num+1 : 0 ; + } +#line 3227 "y.tab.c" /* yacc.c:1646 */ + break; + + case 181: +#line 1165 "parse.y" /* yacc.c:1646 */ + { (yyval.ca_p) = (CA_REC *) 0 ; } +#line 3233 "y.tab.c" /* yacc.c:1646 */ + break; + + case 182: +#line 1167 "parse.y" /* yacc.c:1646 */ + { (yyval.ca_p) = MAWK_ZMALLOC(MAWK, CA_REC) ; + (yyval.ca_p)->link = (yyvsp[-2].ca_p) ; + (yyval.ca_p)->type = CA_EXPR ; + (yyval.ca_p)->arg_num = (yyvsp[-2].ca_p) ? (yyvsp[-2].ca_p)->arg_num+1 : 0 ; + (yyval.ca_p)->call_offset = mawk_code_offset ; + } +#line 3244 "y.tab.c" /* yacc.c:1646 */ + break; + + case 183: +#line 1174 "parse.y" /* yacc.c:1646 */ + { (yyval.ca_p) = MAWK_ZMALLOC(MAWK, CA_REC) ; + (yyval.ca_p)->link = (yyvsp[-2].ca_p) ; + (yyval.ca_p)->arg_num = (yyvsp[-2].ca_p) ? (yyvsp[-2].ca_p)->arg_num+1 : 0 ; + + mawk_code_call_id(MAWK, (yyval.ca_p), (yyvsp[-1].stp)) ; + } +#line 3255 "y.tab.c" /* yacc.c:1646 */ + break; + + case 184: +#line 1183 "parse.y" /* yacc.c:1646 */ + { (yyval.ca_p) = MAWK_ZMALLOC(MAWK, CA_REC) ; + (yyval.ca_p)->type = CA_EXPR ; + (yyval.ca_p)->call_offset = mawk_code_offset ; + } +#line 3264 "y.tab.c" /* yacc.c:1646 */ + break; + + case 185: +#line 1189 "parse.y" /* yacc.c:1646 */ + { (yyval.ca_p) = MAWK_ZMALLOC(MAWK, CA_REC) ; + mawk_code_call_id(MAWK, (yyval.ca_p), (yyvsp[-1].stp)) ; + } +#line 3272 "y.tab.c" /* yacc.c:1646 */ + break; + + case 186: +#line 1196 "parse.y" /* yacc.c:1646 */ + { mawk_parser_include(MAWK, (yyvsp[0].ptr)); } +#line 3278 "y.tab.c" /* yacc.c:1646 */ + break; + + +#line 3282 "y.tab.c" /* yacc.c:1646 */ + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); + + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if ! YYERROR_VERBOSE + yyerror (MAWK, YY_("syntax error")); +#else +# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ + yyssp, yytoken) + { + char const *yymsgp = YY_("syntax error"); + int yysyntax_error_status; + yysyntax_error_status = YYSYNTAX_ERROR; + if (yysyntax_error_status == 0) + yymsgp = yymsg; + else if (yysyntax_error_status == 1) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); + if (!yymsg) + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = 2; + } + else + { + yysyntax_error_status = YYSYNTAX_ERROR; + yymsgp = yymsg; + } + } + yyerror (MAWK, yymsgp); + if (yysyntax_error_status == 2) + goto yyexhaustedlab; + } +# undef YYSYNTAX_ERROR +#endif + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, MAWK); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + yystos[yystate], yyvsp, MAWK); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#if !defined yyoverflow || YYERROR_VERBOSE +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (MAWK, YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, MAWK); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp, MAWK); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + return yyresult; +} +#line 1204 "parse.y" /* yacc.c:1906 */ + + +/* resize the code for a user function */ + +static void resize_fblock(mawk_state_t *MAWK, FBLOCK *fbp) +{ + CODEBLOCK *p = MAWK_ZMALLOC(MAWK, CODEBLOCK) ; + + mawk_code2op(MAWK, _RET0, _HALT) ; + /* make sure there is always a return */ + + *p = MAWK->active_code ; + fbp->code = mawk_code_shrink(MAWK, p, &fbp->size) ; + /* mawk_code_shrink() zfrees p */ + +/* this list is alos used to free functions in pedantic mode */ +#ifndef MAWK_MEM_PEDANTIC + if ( MAWK->dump_code_flag ) +#endif + mawk_add_to_fdump_list(MAWK, fbp) ; +/* printf("CODE add: %p/%d\n", fbp->code, fbp->size);*/ +} + + +/* convert FE_PUSHA to FE_PUSHI + or F_PUSH to F_PUSHI +*/ +static void field_A2I(mawk_state_t *MAWK) +{ + mawk_cell_t *cp; + + if ( mawk_code_ptr[-1].op == FE_PUSHA && + mawk_code_ptr[-1].ptr == (PTR) 0) + /* On most architectures, the two mawk_tests are the same; a good + compiler might eliminate one. On LM_DOS, and possibly other + segmented architectures, they are not */ + { mawk_code_ptr[-1].op = FE_PUSHI ; } + else + { + cp = (mawk_cell_t *) mawk_code_ptr[-1].ptr ; + + if (cp == MAWK->field || (cp > MAWK_NF && cp <= LAST_PFIELD)) + { + mawk_code_ptr[-2].op = _PUSHI ; + } + else if ( cp == MAWK_NF ) + { mawk_code_ptr[-2].op = NF_PUSHI ; mawk_code_ptr-- ; } + + else + { + mawk_code_ptr[-2].op = F_PUSHI ; + mawk_code_ptr -> op = mawk_field_addr_to_index(MAWK, mawk_code_ptr[-1].ptr ) ; + mawk_code_ptr++ ; + } + } +} + +/* we've seen an ID in a context where it should be a VAR, + check that's consistent with previous usage */ +static void check_var(mawk_state_t *MAWK, register SYMTAB *p) +{ + switch(p->type) + { + case ST_NONE : /* new id */ + p->type = ST_VAR ; + p->stval.cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + p->stval.cp->type = C_NOINIT ; + break ; + + case ST_LOCAL_NONE : + p->type = ST_LOCAL_VAR ; + MAWK->active_funct->typev[p->offset] = ST_LOCAL_VAR ; + break ; + + case ST_VAR : + case ST_LOCAL_VAR : break ; + + default : + mawk_type_error(MAWK, p) ; + break ; + } +} + +/* we've seen an ID in a context where it should be an ARRAY, + check that's consistent with previous usage */ +static void check_array(mawk_state_t *MAWK, register SYMTAB *p) +{ + switch(p->type) + { + case ST_NONE : /* a new array */ + p->type = ST_ARRAY ; + p->stval.array = mawk_array_new(MAWK, NULL) ; + break ; + + case ST_ARRAY : + case ST_LOCAL_ARRAY : + break ; + + case ST_LOCAL_NONE : + p->type = ST_LOCAL_ARRAY ; + MAWK->active_funct->typev[p->offset] = ST_LOCAL_ARRAY ; + break ; + + default : mawk_type_error(MAWK, p) ; break ; + } +} + +static void mawk_code_array(mawk_state_t *MAWK, register SYMTAB *p) +{ + if ( is_local(p) ) mawk_code2op(MAWK, LA_PUSHA, p->offset) ; + else code2(MAWK, A_PUSHA, p->stval.array) ; +} + + +/* we've seen an ID as an argument to a user defined function */ +static void mawk_code_call_id(mawk_state_t *MAWK, register CA_REC *p, register SYMTAB *ip) +{ + p->call_offset = mawk_code_offset ; + /* This always get set now. So that fcall:relocate_arglist + works. */ + + switch( ip->type ) + { + case ST_VAR : + p->type = CA_EXPR ; + code2(MAWK, _PUSHI, ip->stval.cp) ; + break ; + + case ST_LOCAL_VAR : + p->type = CA_EXPR ; + mawk_code2op(MAWK, L_PUSHI, ip->offset) ; + break ; + + case ST_ARRAY : + p->type = CA_ARRAY ; + code2(MAWK, A_PUSHA, ip->stval.array) ; + break ; + + case ST_LOCAL_ARRAY : + p->type = CA_ARRAY ; + mawk_code2op(MAWK, LA_PUSHA, ip->offset) ; + break ; + + /* not enough info to code it now; it will have to + be patched later */ + + case ST_NONE : + p->type = ST_NONE ; + p->sym_p = ip ; + code2(MAWK, _PUSHI, &MAWK->code_call_id_dummy) ; + break ; + + case ST_LOCAL_NONE : + p->type = ST_LOCAL_NONE ; + p->type_p = & MAWK->active_funct->typev[ip->offset] ; + mawk_code2op(MAWK, L_PUSHI, ip->offset) ; + break ; + +#ifdef DEBUG + default : + mawk_bozo(MAWK, "mawk_code_call_id") ; +#endif + + } +} + +/* an RE by itself was coded as _MATCH0 , change to + push as an expression */ + +static void RE_as_arg(mawk_state_t *MAWK) +{ + mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + mawk_code_ptr -= 2 ; + cp->type = C_RE ; + cp->ptr = mawk_code_ptr[1].ptr ; + code2(MAWK, _PUSHC, cp) ; +} + +/* reset the active_code back to the MAIN block */ +static void switch_code_to_main(mawk_state_t *MAWK) +{ + switch(MAWK->scope) + { + case SCOPE_BEGIN : + *MAWK->begin_code_p = MAWK->active_code ; + MAWK->active_code = *MAWK->main_code_p ; + break ; + + case SCOPE_END : + *MAWK->end_code_p = MAWK->active_code ; + MAWK->active_code = *MAWK->main_code_p ; + break ; + + case SCOPE_FUNCT : + MAWK->active_code = *MAWK->main_code_p ; + break ; + + case SCOPE_MAIN : + break ; + } + MAWK->active_funct = (FBLOCK*) 0 ; + MAWK->scope = SCOPE_MAIN ; +} + + +void mawk_parse(mawk_state_t *MAWK) +{ + if (!MAWK->binary_loaded) { + if ( yyparse(MAWK) || MAWK->compile_error_count != 0 ) mawk_exit(MAWK, 2) ; + + mawk_scan_cleanup(MAWK) ; + mawk_set_code(MAWK) ; + /* code must be set before call to mawk_resolve_fcalls() */ + if ( MAWK->resolve_list ) mawk_resolve_fcalls(MAWK) ; + } + + if ( MAWK->compile_error_count != 0 ) mawk_exit(MAWK, 2) ; + if ( MAWK->dump_code_flag ) { mawk_dump_code(MAWK);} + if ( MAWK->dump_sym_flag ) { mawk_dump_sym_text(MAWK); } + if ((MAWK->dump_code_flag ) || ( MAWK->dump_sym_flag )) { mawk_exit(MAWK, 0); } + + (void)mawk_d2f(NULL); /* suppress compiler warning */ +} + + +void mawk_parser_include(mawk_state_t *MAWK, void *str) +{ + mawk_parser_push(MAWK); + + MAWK->ps.eof_flag = 0 ; + MAWK->ps.pfile_name = ((mawk_string_t *)str)->str; + MAWK->ps.buffp = MAWK->ps.buffer = (unsigned char *) mawk_zmalloc(MAWK, BUFFSZ + 1) ; + *MAWK->ps.buffp = '\0'; + if (mawk_scan_open(MAWK) == 1) + MAWK->token_lineno = MAWK->lineno = 1 ; + else + mawk_parser_pop(MAWK); +} diff --git a/src/libmawk/parse.h b/src/libmawk/parse.h new file mode 100644 index 0000000..2e99737 --- /dev/null +++ b/src/libmawk/parse.h @@ -0,0 +1,232 @@ +/* A Bison parser, made by GNU Bison 3.0.2. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +#ifndef YY_MAWK_Y_TAB_H_INCLUDED +# define YY_MAWK_Y_TAB_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int Mawk_debug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + UNEXPECTED = 258, + BAD_DECIMAL = 259, + NL = 260, + SEMI_COLON = 261, + LBRACE = 262, + RBRACE = 263, + LBOX = 264, + RBOX = 265, + COMMA = 266, + IO_OUT = 267, + ASSIGN = 268, + ADD_ASG = 269, + SUB_ASG = 270, + MUL_ASG = 271, + DIV_ASG = 272, + MOD_ASG = 273, + POW_ASG = 274, + QMARK = 275, + COLON = 276, + OR = 277, + AND = 278, + IN = 279, + MATCH = 280, + EQ = 281, + NEQ = 282, + LT = 283, + LTE = 284, + GT = 285, + GTE = 286, + CAT = 287, + GETLINE = 288, + PLUS = 289, + MINUS = 290, + MUL = 291, + DIV = 292, + MOD = 293, + NOT = 294, + UMINUS = 295, + IO_IN = 296, + PIPE = 297, + POW = 298, + INC_or_DEC = 299, + DOLLAR = 300, + FIELD = 301, + LPAREN = 302, + RPAREN = 303, + DOUBLE = 304, + STRING_ = 305, + RE = 306, + ID = 307, + D_ID = 308, + FUNCT_ID = 309, + C_FUNCT_ID = 310, + BUILTIN = 311, + LENGTH = 312, + PRINT = 313, + PRINTF = 314, + SPLIT = 315, + MATCH_FUNC = 316, + SUB = 317, + GSUB = 318, + DO = 319, + WHILE = 320, + FOR = 321, + BREAK = 322, + CONTINUE = 323, + IF = 324, + ELSE = 325, + DELETE = 326, + BEGIN = 327, + END = 328, + EXIT = 329, + NEXT = 330, + RETURN = 331, + FUNCTION = 332, + INCLUDE = 333 + }; +#endif +/* Tokens. */ +#define UNEXPECTED 258 +#define BAD_DECIMAL 259 +#define NL 260 +#define SEMI_COLON 261 +#define LBRACE 262 +#define RBRACE 263 +#define LBOX 264 +#define RBOX 265 +#define COMMA 266 +#define IO_OUT 267 +#define ASSIGN 268 +#define ADD_ASG 269 +#define SUB_ASG 270 +#define MUL_ASG 271 +#define DIV_ASG 272 +#define MOD_ASG 273 +#define POW_ASG 274 +#define QMARK 275 +#define COLON 276 +#define OR 277 +#define AND 278 +#define IN 279 +#define MATCH 280 +#define EQ 281 +#define NEQ 282 +#define LT 283 +#define LTE 284 +#define GT 285 +#define GTE 286 +#define CAT 287 +#define GETLINE 288 +#define PLUS 289 +#define MINUS 290 +#define MUL 291 +#define DIV 292 +#define MOD 293 +#define NOT 294 +#define UMINUS 295 +#define IO_IN 296 +#define PIPE 297 +#define POW 298 +#define INC_or_DEC 299 +#define DOLLAR 300 +#define FIELD 301 +#define LPAREN 302 +#define RPAREN 303 +#define DOUBLE 304 +#define STRING_ 305 +#define RE 306 +#define ID 307 +#define D_ID 308 +#define FUNCT_ID 309 +#define C_FUNCT_ID 310 +#define BUILTIN 311 +#define LENGTH 312 +#define PRINT 313 +#define PRINTF 314 +#define SPLIT 315 +#define MATCH_FUNC 316 +#define SUB 317 +#define GSUB 318 +#define DO 319 +#define WHILE 320 +#define FOR 321 +#define BREAK 322 +#define CONTINUE 323 +#define IF 324 +#define ELSE 325 +#define DELETE 326 +#define BEGIN 327 +#define END 328 +#define EXIT 329 +#define NEXT 330 +#define RETURN 331 +#define FUNCTION 332 +#define INCLUDE 333 + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE YYSTYPE; +union YYSTYPE +{ +#line 136 "parse.y" /* yacc.c:1909 */ + +mawk_cell_t *cp ; +SYMTAB *stp ; +int start ; /* code starting address as offset from code_base */ +PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */ +const BI_REC *bip ; /* ptr to info about a builtin */ +FBLOCK *fbp ; /* ptr to a function block */ +ARG2_REC *arg2p ; +CA_REC *ca_p ; +int ival ; +PTR ptr ; + +#line 223 "y.tab.h" /* yacc.c:1909 */ +}; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + +int Mawk_parse (mawk_state_t *MAWK); + +#endif /* !YY_MAWK_Y_TAB_H_INCLUDED */ diff --git a/src/libmawk/parse.y b/src/libmawk/parse.y new file mode 100644 index 0000000..39bb0f9 --- /dev/null +++ b/src/libmawk/parse.y @@ -0,0 +1,1442 @@ + +/******************************************** +parse.y + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-94, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* $Log: parse.y,v $ + * Revision 1.11 1995/06/11 22:40:09 mike + * change if(dump_code) -> if(dump_code_flag) + * cleanup of parse() + * add cast to shutup solaris cc compiler on char to int comparison + * switch_code_to_main() which cleans up outside_error production + * + * Revision 1.10 1995/04/21 14:20:21 mike + * move_level variable to fix bug in arglist patching of moved code. + * + * Revision 1.9 1995/02/19 22:15:39 mike + * Always set the call_offset field in a CA_REC (for obscure + * reasons in fcall.c (see comments) there.) + * + * Revision 1.8 1994/12/13 00:39:20 mike + * delete A statement to delete all of A at once + * + * Revision 1.7 1994/10/08 19:15:48 mike + * remove SM_DOS + * + * Revision 1.6 1993/12/01 14:25:17 mike + * reentrant array loops + * + * Revision 1.5 1993/07/22 00:04:13 mike + * new op code _LJZ _LJNZ + * + * Revision 1.4 1993/07/15 23:38:15 mike + * SIZE_T and indent + * + * Revision 1.3 1993/07/07 00:07:46 mike + * more work on 1.2 + * + * Revision 1.2 1993/07/03 21:18:01 mike + * bye to yacc_mem + * + * Revision 1.1.1.1 1993/07/03 18:58:17 mike + * move source to cvs + * + * Revision 5.8 1993/05/03 01:07:18 mike + * fix mawk_bozo in LENGTH production + * + * Revision 5.7 1993/01/09 19:03:44 mike + * code_pop checks if the resolve_list needs relocation + * + * Revision 5.6 1993/01/07 02:50:33 mike + * relative vs absolute code + * + * Revision 5.5 1993/01/01 21:30:48 mike + * split mawk_new_STRING() into mawk_new_STRING and mawk_new_STRING0 + * + * Revision 5.4 1992/08/08 17:17:20 brennan + * patch 2: improved timing of error recovery in + * bungled function definitions. Fixes a core dump + * + * Revision 5.3 1992/07/08 15:43:41 brennan + * patch2: length returns. I am a wimp + * + * Revision 5.2 1992/01/08 16:11:42 brennan + * code FE_PUSHA carefully for MSDOS large mode + * + * Revision 5.1 91/12/05 07:50:22 brennan + * 1.1 pre-release + * +*/ + +%pure-parser +%parse-param {mawk_state_t *MAWK} +%lex-param {mawk_state_t *MAWK} + +%{ +#include +#include "mawk.h" +#include "types.h" +#include "symtype.h" +#include "code.h" +#include "memory.h" +#include "bi_funct.h" +#include "bi_vars.h" +#include "jmp.h" +#include "field.h" +#include "files.h" +#include "scan.h" +#include "zmalloc.h" +#include "f2d.h" + + +#define YYMAXDEPTH 200 + +void mawk_eat_nl(mawk_state_t * MAWK, YYSTYPE *lvalp); +static void resize_fblock(mawk_state_t *, FBLOCK *); +static void switch_code_to_main(mawk_state_t *); +static void mawk_code_array(mawk_state_t *, SYMTAB *); +static void mawk_code_call_id(mawk_state_t *, CA_REC *, SYMTAB *); +static void field_A2I(mawk_state_t *MAWK); +static void check_var(mawk_state_t *, SYMTAB *); +static void check_array(mawk_state_t *, SYMTAB *); +static void RE_as_arg(mawk_state_t *MAWK); + +void mawk_parser_include(mawk_state_t *MAWK, void *str); + +#define mawk_code_address(x) \ +do { \ + if (is_local(x)) \ + mawk_code2op(MAWK, L_PUSHA, (x)->offset) ;\ + else \ + code2(MAWK, _PUSHA, (x)->stval.cp); \ +} while(0) + +#define CDP(x) (mawk_code_base+(x)) +/* WARNING: These CDP() calculations become invalid after calls + that might change code_base. Which are: code2(), mawk_code2op(), + code_jmp() and code_pop(). +*/ + +/* this nonsense caters to MSDOS large model */ +#define CODE_FE_PUSHA() mawk_code_ptr->ptr = (PTR) 0 ; code1(FE_PUSHA) + +%} + +%union{ +mawk_cell_t *cp ; +SYMTAB *stp ; +int start ; /* code starting address as offset from code_base */ +PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */ +const BI_REC *bip ; /* ptr to info about a builtin */ +FBLOCK *fbp ; /* ptr to a function block */ +ARG2_REC *arg2p ; +CA_REC *ca_p ; +int ival ; +PTR ptr ; +} + +/* two tokens to help with errors */ +%token UNEXPECTED /* unexpected character */ +%token BAD_DECIMAL + +%token NL +%token SEMI_COLON +%token LBRACE RBRACE +%token LBOX RBOX +%token COMMA +%token IO_OUT /* > or output pipe */ + +%right ASSIGN ADD_ASG SUB_ASG MUL_ASG DIV_ASG MOD_ASG POW_ASG +%right QMARK COLON +%left OR +%left AND +%left IN +%left MATCH /* ~ or !~ */ +%left EQ NEQ LT LTE GT GTE +%left CAT +%left GETLINE +%left PLUS MINUS +%left MUL DIV MOD +%left NOT UMINUS +%nonassoc IO_IN PIPE +%right POW +%left INC_or_DEC +%left DOLLAR FIELD /* last to remove a SR conflict + with getline */ +%right LPAREN RPAREN /* removes some SR conflicts */ + +%token DOUBLE STRING_ RE +%token ID D_ID +%token FUNCT_ID +%token C_FUNCT_ID +%token BUILTIN LENGTH +%token FIELD + +%token PRINT PRINTF SPLIT MATCH_FUNC SUB GSUB +/* keywords */ +%token DO WHILE FOR BREAK CONTINUE IF ELSE IN +%token DELETE BEGIN END EXIT NEXT RETURN FUNCTION INCLUDE + +%type block block_or_separator +%type statement_list statement mark +%type pr_args +%type arg2 +%type builtin +%type getline_file +%type lvalue lvalue_arrwr bifunct_target_arr field bifunct_target +%type expr cat_expr p_expr +%type while_front if_front +%type for1 for2 +%type array_loop_front +%type return_statement +%type split_front re_arg sub_back +%type arglist args +%type print sub_or_gsub +%type funct_start funct_head +%type call_args ca_front ca_back +%type f_arglist f_args + +%% +/* productions */ + +program : program_block + | program program_block + ; + +program_block : PA_block /* pattern-action */ + | function_def + | outside_error block + ; + +PA_block : block + { /* this do nothing action removes a vacuous warning + from Bison */ + } + + | include + + | NL /* allow newline anywhere between blocks - normally scan.c eats this up, but include introduces a corner case */ + + | BEGIN + { mawk_be_setup(MAWK, MAWK->scope = SCOPE_BEGIN) ; } + + block + { switch_code_to_main(MAWK) ; } + + | END + { mawk_be_setup(MAWK, MAWK->scope = SCOPE_END) ; } + + block + { switch_code_to_main(MAWK) ; } + + | expr /* this works just like an if statement */ + { mawk_code_jmp(MAWK, _JZ, (INST*)0) ; } + + block_or_separator + { mawk_patch_jmp(MAWK, mawk_code_ptr ) ; } + + /* range pattern, see comment in mawk_execute.c near _RANGE */ + | expr COMMA + { + INST *p1 = CDP($1) ; + int len ; + + mawk_code_push(MAWK, p1, mawk_code_ptr - p1, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr = p1 ; + + mawk_code2op(MAWK, _RANGE_CHK, 1) ; + mawk_code_ptr += 3 ; + len = mawk_code_pop(MAWK, mawk_code_ptr) ; + mawk_code_ptr += len ; + code1(_RANGE_STOP) ; + p1 = CDP($1) ; + p1[2].op = mawk_code_ptr - (p1+1) ; + } + expr + { code1(_RANGE_STOP) ; } + + block_or_separator + { + INST *p1 = CDP($1) ; + + p1[3].op = CDP($6) - (p1+1) ; + p1[4].op = mawk_code_ptr - (p1+1) ; + } + ; + + + +block : LBRACE statement_list RBRACE + { $$ = $2 ; } + | LBRACE error RBRACE + { $$ = mawk_code_offset ; /* does nothing won't be mawk_executed */ + MAWK->print_flag = MAWK->getline_flag = MAWK->paren_cnt = 0 ; + yyerrok ; } + ; + +block_or_separator : block + | separator /* default print action */ + { $$ = mawk_code_offset ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _PRINT, mawk_f2d(mawk_bi_print)) ; + } + +statement_list : statement + | statement_list statement + ; + + +statement : block + | expr separator + { code1(_POP) ; } + | /* empty */ separator + { $$ = mawk_code_offset ; } + | error separator + { $$ = mawk_code_offset ; + MAWK->print_flag = MAWK->getline_flag = 0 ; + MAWK->paren_cnt = 0 ; + yyerrok ; + } + | BREAK separator + { $$ = mawk_code_offset ; mawk_BC_insert(MAWK, 'B', mawk_code_ptr+1) ; + code2(MAWK, _JMP, 0) /* don't use mawk_code_jmp ! */ ; } + | CONTINUE separator + { $$ = mawk_code_offset ; mawk_BC_insert(MAWK, 'C', mawk_code_ptr+1) ; + code2(MAWK, _JMP, 0) ; } + | return_statement + { if ( MAWK->scope != SCOPE_FUNCT ) + mawk_compile_error(MAWK, "return outside function body") ; + } + | NEXT separator + { if ( MAWK->scope != SCOPE_MAIN ) + mawk_compile_error(MAWK, "improper use of next" ) ; + $$ = mawk_code_offset ; + code1(_NEXT) ; + } + ; + +separator : NL | SEMI_COLON + ; + +expr : cat_expr + | lvalue ASSIGN expr { code1(_ASSIGN) ; } + | lvalue ADD_ASG expr { code1(_ADD_ASG) ; } + | lvalue SUB_ASG expr { code1(_SUB_ASG) ; } + | lvalue MUL_ASG expr { code1(_MUL_ASG) ; } + | lvalue DIV_ASG expr { code1(_DIV_ASG) ; } + | lvalue MOD_ASG expr { code1(_MOD_ASG) ; } + | lvalue POW_ASG expr { code1(_POW_ASG) ; } + | lvalue_arrwr ASSIGN expr { code1(_ASSIGN_ARR) ; } + | lvalue_arrwr ADD_ASG expr { code1(_ADD_ASG_ARR) ; } + | lvalue_arrwr SUB_ASG expr { code1(_SUB_ASG_ARR) ; } + | lvalue_arrwr MUL_ASG expr { code1(_MUL_ASG_ARR) ; } + | lvalue_arrwr DIV_ASG expr { code1(_DIV_ASG_ARR) ; } + | lvalue_arrwr MOD_ASG expr { code1(_MOD_ASG_ARR) ; } + | lvalue_arrwr POW_ASG expr { code1(_POW_ASG_ARR) ; } + | expr EQ expr { code1(_EQ) ; } + | expr NEQ expr { code1(_NEQ) ; } + | expr LT expr { code1(_LT) ; } + | expr LTE expr { code1(_LTE) ; } + | expr GT expr { code1(_GT) ; } + | expr GTE expr { code1(_GTE) ; } + + | expr MATCH expr + { + INST *p3 = CDP($3) ; + + if ( p3 == mawk_code_ptr - 2 ) + { + if ( p3->op == _MATCH0 ) p3->op = _MATCH1 ; + + else /* check for string */ + if ( p3->op == _PUSHS ) + { + mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + cp->type = C_STRING ; + cp->ptr = p3[1].ptr ; + mawk_cast_to_RE(MAWK, cp) ; + mawk_code_ptr -= 2 ; + code2(MAWK, _MATCH1, cp->ptr) ; + MAWK_ZFREE(MAWK, cp) ; + } + else code1(_MATCH2) ; + } + else code1(_MATCH2) ; + + if ( !$2 ) code1(_NOT) ; + } + +/* short circuit boolean evaluation */ + | expr OR + { code1(_TEST) ; + mawk_code_jmp(MAWK, _LJNZ, (INST*)0) ; + } + expr + { code1(_TEST) ; mawk_patch_jmp(MAWK, mawk_code_ptr) ; } + + | expr AND + { code1(_TEST) ; + mawk_code_jmp(MAWK, _LJZ, (INST*)0) ; + } + expr + { code1(_TEST) ; mawk_patch_jmp(MAWK, mawk_code_ptr) ; } + + | expr QMARK { mawk_code_jmp(MAWK, _JZ, (INST*)0) ; } + expr COLON { mawk_code_jmp(MAWK, _JMP, (INST*)0) ; } + expr + { mawk_patch_jmp(MAWK, mawk_code_ptr) ; mawk_patch_jmp(MAWK, CDP($7)) ; } + ; + +cat_expr : p_expr %prec CAT + | cat_expr p_expr %prec CAT + { code1(_CAT) ; } + ; + +p_expr : DOUBLE + { $$ = mawk_code_offset ; code2(MAWK, _PUSHD, $1) ; } + | STRING_ + { $$ = mawk_code_offset ; code2(MAWK, _PUSHS, $1) ; } + | ID %prec AND /* anything less than IN */ + { check_var(MAWK, $1) ; + $$ = mawk_code_offset ; + if ( is_local($1) ) + { mawk_code2op(MAWK, L_PUSHI, $1->offset) ; } + else code2(MAWK, _PUSHI, $1->stval.cp) ; + } + + | LPAREN expr RPAREN + { $$ = $2 ; } + ; + +p_expr : RE + { $$ = mawk_code_offset ; code2(MAWK, _MATCH0, $1) ; } + ; + +p_expr : p_expr PLUS p_expr { code1(_ADD) ; } + | p_expr MINUS p_expr { code1(_SUB) ; } + | p_expr MUL p_expr { code1(_MUL) ; } + | p_expr DIV p_expr { code1(_DIV) ; } + | p_expr MOD p_expr { code1(_MOD) ; } + | p_expr POW p_expr { code1(_POW) ; } + | NOT p_expr + { $$ = $2 ; code1(_NOT) ; } + | PLUS p_expr %prec UMINUS + { $$ = $2 ; code1(_UPLUS) ; } + | MINUS p_expr %prec UMINUS + { $$ = $2 ; code1(_UMINUS) ; } + | builtin + ; + +p_expr : ID INC_or_DEC + { check_var(MAWK, $1) ; + $$ = mawk_code_offset ; + mawk_code_address($1) ; + + if ( $2 == '+' ) code1(_POST_INC) ; + else code1(_POST_DEC) ; + } + | INC_or_DEC lvalue + { $$ = $2 ; + if ( $1 == '+' ) code1(_PRE_INC) ; + else code1(_PRE_DEC) ; + } + | INC_or_DEC lvalue_arrwr + { $$ = $2 ; + if ( $1 == '+' ) code1(_PRE_INC_ARR) ; + else code1(_PRE_DEC_ARR) ; + } + ; + +p_expr : field INC_or_DEC + { if ($2 == '+' ) code1(F_POST_INC ) ; + else code1(F_POST_DEC) ; + } + | INC_or_DEC field + { $$ = $2 ; + if ( $1 == '+' ) code1(F_PRE_INC) ; + else code1( F_PRE_DEC) ; + } + ; + +lvalue : ID + { $$ = mawk_code_offset ; + check_var(MAWK, $1) ; + mawk_code_address($1) ; + } + ; + + +arglist : /* empty */ + { $$ = 0 ; } + | args + ; + +args : expr %prec LPAREN + { $$ = 1 ; } + | args COMMA expr + { $$ = $1 + 1 ; } + ; + +builtin : + BUILTIN mark LPAREN arglist RPAREN + { const BI_REC *p = $1 ; + $$ = $2 ; + if ( (int)p->min_args > $4 || (int)p->max_args < $4 ) + mawk_compile_error( + MAWK, "wrong number of arguments in call to %s" , + p->name ) ; + if ( p->min_args != p->max_args ) /* variable args */ + { code1(_PUSHINT) ; code1($4) ; } + code2(MAWK, _BUILTIN , mawk_f2d(p->fp)) ; + } + | LENGTH /* this is an irritation */ + { + $$ = mawk_code_offset ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _BUILTIN, mawk_f2d($1->fp)) ; + } + ; + +/* an empty production to store the mawk_code_ptr */ +mark : /* empty */ + { $$ = mawk_code_offset ; } + +/* print_statement */ +statement : print mark pr_args pr_direction separator + { code2(MAWK, _PRINT, mawk_f2d($1)) ; + if ( $1 == mawk_bi_printf && $3 == 0 ) + mawk_compile_error(MAWK, "no arguments in call to printf") ; + MAWK->print_flag = 0 ; + $$ = $2 ; + } + ; + +print : PRINT { $$ = mawk_bi_print ; MAWK->print_flag = 1 ;} + | PRINTF { $$ = mawk_bi_printf ; MAWK->print_flag = 1 ; } + ; + +pr_args : arglist { mawk_code2op(MAWK, _PUSHINT, $1) ; } + | LPAREN arg2 RPAREN + { $$ = $2->cnt ; mawk_zfree(MAWK, $2,sizeof(ARG2_REC)) ; + mawk_code2op(MAWK, _PUSHINT, $$) ; + } + | LPAREN RPAREN + { $$=0 ; mawk_code2op(MAWK, _PUSHINT, 0) ; } + ; + +arg2 : expr COMMA expr + { $$ = (ARG2_REC*) mawk_zmalloc(MAWK, sizeof(ARG2_REC)) ; + $$->start = $1 ; + $$->cnt = 2 ; + } + | arg2 COMMA expr + { $$ = $1 ; $$->cnt++ ; } + ; + +pr_direction : /* empty */ + | IO_OUT expr + { mawk_code2op(MAWK, _PUSHINT, $1) ; } + ; + + +/* IF and IF-ELSE */ + +if_front : IF LPAREN expr RPAREN + { $$ = $3 ; mawk_eat_nl(MAWK, &yylval) ; mawk_code_jmp(MAWK, _JZ, (INST*)0) ; } + ; + +/* if_statement */ +statement : if_front statement + { mawk_patch_jmp(MAWK, mawk_code_ptr ) ; } + ; + +else : ELSE { mawk_eat_nl(MAWK, &yylval) ; mawk_code_jmp(MAWK, _JMP, (INST*)0) ; } + ; + +/* if_else_statement */ +statement : if_front statement else statement + { mawk_patch_jmp(MAWK, mawk_code_ptr) ; + mawk_patch_jmp(MAWK, CDP($4)) ; + } + + +/* LOOPS */ + +do : DO + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; } + ; + +/* do_statement */ +statement : do statement WHILE LPAREN expr RPAREN separator + { $$ = $2 ; + mawk_code_jmp(MAWK, _JNZ, CDP($2)) ; + mawk_BC_clear(MAWK, mawk_code_ptr, CDP($5)) ; } + ; + +while_front : WHILE LPAREN expr RPAREN + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + $$ = $3 ; + + /* check if const expression */ + if ( mawk_code_ptr - 2 == CDP($3) && + mawk_code_ptr[-2].op == _PUSHD && + *(double*)mawk_code_ptr[-1].ptr != 0.0 + ) + mawk_code_ptr -= 2 ; + else + { INST *p3 = CDP($3) ; + mawk_code_push(MAWK, p3, mawk_code_ptr-p3, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr = p3 ; + code2(MAWK, _JMP, (INST*)0) ; /* code2() not mawk_code_jmp() */ + } + } + ; + +/* while_statement */ +statement : while_front statement + { + int saved_offset ; + int len ; + INST *p1 = CDP($1) ; + INST *p2 = CDP($2) ; + + if ( p1 != p2 ) /* real mawk_test in loop */ + { + p1[1].op = mawk_code_ptr-(p1+1) ; + saved_offset = mawk_code_offset ; + len = mawk_code_pop(MAWK, mawk_code_ptr) ; + mawk_code_ptr += len ; + mawk_code_jmp(MAWK, _JNZ, CDP($2)) ; + mawk_BC_clear(MAWK, mawk_code_ptr, CDP(saved_offset)) ; + } + else /* while(1) */ + { + mawk_code_jmp(MAWK, _JMP, p1) ; + mawk_BC_clear(MAWK, mawk_code_ptr, CDP($2)) ; + } + } + ; + + +/* for_statement */ +statement : for1 for2 for3 statement + { + int cont_offset = mawk_code_offset ; + unsigned len = mawk_code_pop(MAWK, mawk_code_ptr) ; + INST *p2 = CDP($2) ; + INST *p4 = CDP($4) ; + + mawk_code_ptr += len ; + + if ( p2 != p4 ) /* real mawk_test in for2 */ + { + p4[-1].op = mawk_code_ptr - p4 + 1 ; + len = mawk_code_pop(MAWK, mawk_code_ptr) ; + mawk_code_ptr += len ; + mawk_code_jmp(MAWK, _JNZ, CDP($4)) ; + } + else /* for(;;) */ + mawk_code_jmp(MAWK, _JMP, p4) ; + + mawk_BC_clear(MAWK, mawk_code_ptr, CDP(cont_offset)) ; + + } + ; + +for1 : FOR LPAREN SEMI_COLON { $$ = mawk_code_offset ; } + | FOR LPAREN expr SEMI_COLON + { $$ = $3 ; code1(_POP) ; } + ; + +for2 : SEMI_COLON { $$ = mawk_code_offset ; } + | expr SEMI_COLON + { + if ( mawk_code_ptr - 2 == CDP($1) && + mawk_code_ptr[-2].op == _PUSHD && + * (double*) mawk_code_ptr[-1].ptr != 0.0 + ) + mawk_code_ptr -= 2 ; + else + { + INST *p1 = CDP($1) ; + mawk_code_push(MAWK, p1, mawk_code_ptr-p1, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr = p1 ; + code2(MAWK, _JMP, (INST*)0) ; + } + } + ; + +for3 : RPAREN + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + mawk_code_push(MAWK, (INST*)0,0, MAWK->scope, MAWK->active_funct) ; + } + | expr RPAREN + { INST *p1 = CDP($1) ; + + mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + code1(_POP) ; + mawk_code_push(MAWK, p1, mawk_code_ptr - p1, MAWK->scope, MAWK->active_funct) ; + mawk_code_ptr -= mawk_code_ptr - p1 ; + } + ; + + +/* arrays */ + +expr : expr IN ID + { check_array(MAWK, $3) ; + mawk_code_array(MAWK, $3) ; + code1(A_TEST) ; + } + | LPAREN arg2 RPAREN IN ID + { $$ = $2->start ; + mawk_code2op(MAWK, A_CAT, $2->cnt) ; + mawk_zfree(MAWK, $2, sizeof(ARG2_REC)) ; + + check_array(MAWK, $5) ; + mawk_code_array(MAWK, $5) ; + code1(A_TEST) ; + } + ; + +/* array reference for a variable that is in the target (writable) part of + a bi_funct call such as gsub, sub or getline */ +bifunct_target_arr : ID mark LBOX args RBOX + { + if ( $4 > 1 ) + { mawk_code2op(MAWK, A_CAT, $4) ; } + + check_array(MAWK, $1) ; + if( is_local($1) ) + { mawk_code2op(MAWK, LAE_PUSHA, $1->offset) ; } + else code2(MAWK, AE_PUSHA, $1->stval.array) ; + $$ = $2 ; + } + ; + +lvalue_arrwr : ID mark LBOX args RBOX + { + if ( $4 > 1 ) + { mawk_code2op(MAWK, A_CAT, $4) ; } + + check_array(MAWK, $1) ; + if( is_local($1) ) + { mawk_code2op(MAWK, LAE_PUSHA_WRARR, $1->offset) ; } + else code2(MAWK, AE_PUSHA_WRARR, $1->stval.array) ; + $$ = $2 ; + } + ; + +p_expr : ID mark LBOX args RBOX %prec AND + { + if ( $4 > 1 ) + { mawk_code2op(MAWK, A_CAT, $4) ; } + + check_array(MAWK, $1) ; + if( is_local($1) ) + { mawk_code2op(MAWK, LAE_PUSHI, $1->offset) ; } + else code2(MAWK, AE_PUSHI, $1->stval.array) ; + $$ = $2 ; + } + + | ID mark LBOX args RBOX INC_or_DEC + { + if ( $4 > 1 ) + { mawk_code2op(MAWK, A_CAT,$4) ; } + + check_array(MAWK, $1) ; + if( is_local($1) ) + { mawk_code2op(MAWK, LAE_PUSHA_WRARR, $1->offset) ; } + else code2(MAWK, AE_PUSHA_WRARR, $1->stval.array) ; + if ( $6 == '+' ) code1(_POST_INC_ARR) ; + else code1(_POST_DEC_ARR) ; + + $$ = $2 ; + } + ; + +/* delete A[i] or delete A */ +statement : DELETE ID mark LBOX args RBOX separator + { + $$ = $3 ; + if ( $5 > 1 ) { mawk_code2op(MAWK, A_CAT, $5) ; } + check_array(MAWK, $2) ; + mawk_code_array(MAWK, $2) ; + code1(A_DEL) ; + } + | DELETE ID separator + { + $$ = mawk_code_offset ; + check_array(MAWK, $2) ; + mawk_code_array(MAWK, $2) ; + code1(DEL_A) ; + } + ; + +/* for ( i in A ) statement */ + +array_loop_front : FOR LPAREN ID IN ID RPAREN + { mawk_eat_nl(MAWK, &yylval) ; mawk_BC_new(MAWK) ; + $$ = mawk_code_offset ; + + check_var(MAWK, $3) ; + mawk_code_address($3) ; + check_array(MAWK, $5) ; + mawk_code_array(MAWK, $5) ; + + code2(MAWK, SET_ALOOP, (INST*)0) ; + } + ; + +/* array_loop */ +statement : array_loop_front statement + { + INST *p2 = CDP($2) ; + + p2[-1].op = mawk_code_ptr - p2 + 1 ; + mawk_BC_clear(MAWK, mawk_code_ptr+2 , mawk_code_ptr) ; + mawk_code_jmp(MAWK, ALOOP, p2) ; + code1(POP_AL) ; + } + ; + +/* fields + D_ID is a special token , same as an ID, but yylex() + only returns it after a '$'. In essense, + DOLLAR D_ID is really one token. +*/ + +field : FIELD + { $$ = mawk_code_offset ; code2(MAWK, F_PUSHA, $1) ; } + | DOLLAR D_ID + { check_var(MAWK, $2) ; + $$ = mawk_code_offset ; + if ( is_local($2) ) + { mawk_code2op(MAWK, L_PUSHI, $2->offset) ; } + else code2(MAWK, _PUSHI, $2->stval.cp) ; + + CODE_FE_PUSHA() ; + } + | DOLLAR D_ID mark LBOX args RBOX + { + if ( $5 > 1 ) + { mawk_code2op(MAWK, A_CAT, $5) ; } + + check_array(MAWK, $2) ; + if( is_local($2) ) + { mawk_code2op(MAWK, LAE_PUSHI, $2->offset) ; } + else code2(MAWK, AE_PUSHI, $2->stval.array) ; + + CODE_FE_PUSHA() ; + + $$ = $3 ; + } + | DOLLAR p_expr + { $$ = $2 ; CODE_FE_PUSHA() ; } + | LPAREN field RPAREN + { $$ = $2 ; } + ; + +p_expr : field %prec CAT /* removes field (++|--) sr conflict */ + { field_A2I(MAWK) ; } + ; + +expr : field ASSIGN expr { code1(F_ASSIGN) ; } + | field ADD_ASG expr { code1(F_ADD_ASG) ; } + | field SUB_ASG expr { code1(F_SUB_ASG) ; } + | field MUL_ASG expr { code1(F_MUL_ASG) ; } + | field DIV_ASG expr { code1(F_DIV_ASG) ; } + | field MOD_ASG expr { code1(F_MOD_ASG) ; } + | field POW_ASG expr { code1(F_POW_ASG) ; } + ; + +/* split is handled different than a builtin because + it takes an array and optionally a regular expression as args */ + +p_expr : split_front split_back + { code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_split)) ; } + ; + +split_front : SPLIT LPAREN expr COMMA ID + { $$ = $3 ; + check_array(MAWK, $5) ; + mawk_code_array(MAWK, $5) ; + } + ; + +split_back : RPAREN + { code2(MAWK, _PUSHI, &MAWK->fs_shadow) ; } + | COMMA expr RPAREN + { + if ( CDP($2) == mawk_code_ptr - 2 ) + { + if ( mawk_code_ptr[-2].op == _MATCH0 ) + RE_as_arg(MAWK) ; + else + if ( mawk_code_ptr[-2].op == _PUSHS ) + { mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + cp->type = C_STRING ; + cp->ptr = mawk_code_ptr[-1].ptr ; + mawk_cast_for_split(MAWK, cp) ; + mawk_code_ptr[-2].op = _PUSHC ; + mawk_code_ptr[-1].ptr = (PTR) cp ; + } + } + } + ; + + + +/* match(expr, RE) */ + +p_expr : MATCH_FUNC LPAREN expr COMMA re_arg RPAREN + { $$ = $3 ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_match)) ; + } + ; + + +re_arg : expr + { + INST *p1 = CDP($1) ; + + if ( p1 == mawk_code_ptr - 2 ) + { + if ( p1->op == _MATCH0 ) RE_as_arg(MAWK) ; + else + if ( p1->op == _PUSHS ) + { mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + cp->type = C_STRING ; + cp->ptr = p1[1].ptr ; + mawk_cast_to_RE(MAWK, cp) ; + p1->op = _PUSHC ; + p1[1].ptr = (PTR) cp ; + } + } + } + + + +/* exit_statement */ +statement : EXIT separator + { $$ = mawk_code_offset ; + code1(_EXIT0) ; } + | EXIT expr separator + { $$ = $2 ; code1(_EXIT) ; } + +return_statement : RETURN separator + { $$ = mawk_code_offset ; + code1(_RET0) ; } + | RETURN expr separator + { $$ = $2 ; code1(_RET) ; } + +/* getline */ + +p_expr : getline %prec GETLINE + { $$ = mawk_code_offset ; + code2(MAWK, F_PUSHA, &MAWK->field[0]) ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + MAWK->getline_flag = 0 ; + } + | getline bifunct_target %prec GETLINE + { $$ = $2 ; + code1(_PUSHINT) ; code1(0) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + MAWK->getline_flag = 0 ; + } + | getline_file p_expr %prec IO_IN + { code1(_PUSHINT) ; code1(F_IN) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + /* getline_flag already off in yylex() */ + } + | p_expr PIPE GETLINE + { code2(MAWK, F_PUSHA, &MAWK->field[0]) ; + code1(_PUSHINT) ; code1(PIPE_IN) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + } + | p_expr PIPE GETLINE bifunct_target + { + code1(_PUSHINT) ; code1(PIPE_IN) ; + code2(MAWK, _BUILTIN, mawk_f2d(mawk_bi_getline)) ; + } + ; + +getline : GETLINE { MAWK->getline_flag = 1 ; } + +/* gsub(), sub() and getline has to modify one of their args; the arg has + to be a bifunct_target for reference pass */ +bifunct_target : lvalue | bifunct_target_arr | field ; + +getline_file : getline IO_IN + { $$ = mawk_code_offset ; + code2(MAWK, F_PUSHA, MAWK->field+0) ; + } + | getline bifunct_target IO_IN + { $$ = $2 ; } + ; + +/*========================================== + sub and gsub + ==========================================*/ + +p_expr : sub_or_gsub LPAREN re_arg COMMA expr sub_back + { + INST *p5 = CDP($5) ; + INST *p6 = CDP($6) ; + + if ( p6 - p5 == 2 && p5->op == _PUSHS ) + { /* cast from STRING to REPL at compile time */ + mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + cp->type = C_STRING ; + cp->ptr = p5[1].ptr ; + mawk_cast_to_REPL(MAWK, cp) ; + p5->op = _PUSHC ; + p5[1].ptr = (PTR) cp ; + } + code2(MAWK, _BUILTIN, mawk_f2d($1)) ; + $$ = $3 ; + } + ; + +sub_or_gsub : SUB { $$ = mawk_bi_sub ; } + | GSUB { $$ = mawk_bi_gsub ; } + ; + + +sub_back : RPAREN /* substitute into $0 */ + { $$ = mawk_code_offset ; + code2(MAWK, F_PUSHA, &MAWK->field[0]) ; + } + + | COMMA bifunct_target RPAREN + { $$ = $2 ; } + ; + +/*================================================ + user defined functions + *=================================*/ + +function_def : funct_start block + { + resize_fblock(MAWK, $1) ; + mawk_restore_ids(MAWK) ; + switch_code_to_main(MAWK) ; + } + ; + +funct_start : funct_head LPAREN f_arglist RPAREN + { mawk_eat_nl(MAWK, &yylval) ; + MAWK->scope = SCOPE_FUNCT ; + MAWK->active_funct = $1 ; + *MAWK->main_code_p = MAWK->active_code ; + + $1->nargs = $3 ; + if ( $3 ) + $1->typev = (char *) + memset( mawk_zmalloc(MAWK, $3), ST_LOCAL_NONE, $3) ; + else $1->typev = (char *) 0 ; + + mawk_code_ptr = mawk_code_base = + (INST *) mawk_zmalloc(MAWK, INST_BYTES(PAGESZ)); + mawk_code_limit = mawk_code_base + PAGESZ ; + mawk_code_warn = mawk_code_limit - CODEWARN ; + } + ; + +funct_head : FUNCTION ID + { FBLOCK *fbp ; + + if ( $2->type == ST_NONE ) + { + $2->type = ST_FUNCT ; + fbp = $2->stval.fbp = + (FBLOCK *) mawk_zmalloc(MAWK, sizeof(FBLOCK)) ; + fbp->name = $2->name ; + fbp->code = (INST*) 0 ; + } + else + { + mawk_type_error(MAWK, $2 ) ; + + /* this FBLOCK will not be put in + the symbol table */ + fbp = (FBLOCK*) mawk_zmalloc(MAWK, sizeof(FBLOCK)) ; + fbp->name = "" ; + } + $$ = fbp ; + } + + | FUNCTION FUNCT_ID + { $$ = $2 ; + if ( $2->code ) + mawk_compile_error(MAWK, "redefinition of %s" , $2->name) ; + } + ; + +f_arglist : /* empty */ { $$ = 0 ; } + | f_args + ; + +f_args : ID + { $1 = mawk_save_id(MAWK, $1->name) ; + $1->type = ST_LOCAL_NONE ; + $1->offset = 0 ; + $$ = 1 ; + } + | f_args COMMA ID + { if ( is_local($3) ) + mawk_compile_error(MAWK, "%s is duplicated in argument list", + $3->name) ; + else + { $3 = mawk_save_id(MAWK, $3->name) ; + $3->type = ST_LOCAL_NONE ; + $3->offset = $1 ; + $$ = $1 + 1 ; + } + } + ; + +outside_error : error + { /* we may have to recover from a bungled function + definition */ + /* can have local ids, before code scope + changes */ + mawk_restore_ids(MAWK) ; + + switch_code_to_main(MAWK) ; + } + ; + +/* a call to a user defined function */ +p_expr : FUNCT_ID mark call_args + { $$ = $2 ; + code2(MAWK, _CALL, $1) ; + + if ( $3 ) code1($3->arg_num+1) ; + else code1(0) ; + + mawk_check_fcall(MAWK, $1, MAWK->scope, MAWK->ps.code_move_level, MAWK->active_funct, + $3, MAWK->token_lineno) ; + } + ; + +p_expr : C_FUNCT_ID mark call_args + { $$ = $2 ; + code2(MAWK, _CALL, $1) ; + + if ( $3 ) code1($3->arg_num+1) ; + else code1(0) ; + } + ; + +call_args : LPAREN RPAREN + { $$ = (CA_REC *) 0 ; } + | ca_front ca_back + { $$ = $2 ; + $$->link = $1 ; + $$->arg_num = $1 ? $1->arg_num+1 : 0 ; + } + ; + +/* The funny definition of ca_front with the COMMA bound to the ID is to + force a shift to avoid a reduce/reduce conflict + ID->id or ID->array + + Or to avoid a decision, if the type of the ID has not yet been + determined +*/ + +ca_front : LPAREN + { $$ = (CA_REC *) 0 ; } + | ca_front expr COMMA + { $$ = MAWK_ZMALLOC(MAWK, CA_REC) ; + $$->link = $1 ; + $$->type = CA_EXPR ; + $$->arg_num = $1 ? $1->arg_num+1 : 0 ; + $$->call_offset = mawk_code_offset ; + } + | ca_front ID COMMA + { $$ = MAWK_ZMALLOC(MAWK, CA_REC) ; + $$->link = $1 ; + $$->arg_num = $1 ? $1->arg_num+1 : 0 ; + + mawk_code_call_id(MAWK, $$, $2) ; + } + ; + +ca_back : expr RPAREN + { $$ = MAWK_ZMALLOC(MAWK, CA_REC) ; + $$->type = CA_EXPR ; + $$->call_offset = mawk_code_offset ; + } + + | ID RPAREN + { $$ = MAWK_ZMALLOC(MAWK, CA_REC) ; + mawk_code_call_id(MAWK, $$, $1) ; + } + ; + + +include: + INCLUDE STRING_ { mawk_parser_include(MAWK, $2); } + ; + +multi_nl: + NL + | multi_nl NL + ; + +%% + +/* resize the code for a user function */ + +static void resize_fblock(mawk_state_t *MAWK, FBLOCK *fbp) +{ + CODEBLOCK *p = MAWK_ZMALLOC(MAWK, CODEBLOCK) ; + + mawk_code2op(MAWK, _RET0, _HALT) ; + /* make sure there is always a return */ + + *p = MAWK->active_code ; + fbp->code = mawk_code_shrink(MAWK, p, &fbp->size) ; + /* mawk_code_shrink() zfrees p */ + +/* this list is alos used to free functions in pedantic mode */ +#ifndef MAWK_MEM_PEDANTIC + if ( MAWK->dump_code_flag ) +#endif + mawk_add_to_fdump_list(MAWK, fbp) ; +/* printf("CODE add: %p/%d\n", fbp->code, fbp->size);*/ +} + + +/* convert FE_PUSHA to FE_PUSHI + or F_PUSH to F_PUSHI +*/ +static void field_A2I(mawk_state_t *MAWK) +{ + mawk_cell_t *cp; + + if ( mawk_code_ptr[-1].op == FE_PUSHA && + mawk_code_ptr[-1].ptr == (PTR) 0) + /* On most architectures, the two mawk_tests are the same; a good + compiler might eliminate one. On LM_DOS, and possibly other + segmented architectures, they are not */ + { mawk_code_ptr[-1].op = FE_PUSHI ; } + else + { + cp = (mawk_cell_t *) mawk_code_ptr[-1].ptr ; + + if (cp == MAWK->field || (cp > MAWK_NF && cp <= LAST_PFIELD)) + { + mawk_code_ptr[-2].op = _PUSHI ; + } + else if ( cp == MAWK_NF ) + { mawk_code_ptr[-2].op = NF_PUSHI ; mawk_code_ptr-- ; } + + else + { + mawk_code_ptr[-2].op = F_PUSHI ; + mawk_code_ptr -> op = mawk_field_addr_to_index(MAWK, mawk_code_ptr[-1].ptr ) ; + mawk_code_ptr++ ; + } + } +} + +/* we've seen an ID in a context where it should be a VAR, + check that's consistent with previous usage */ +static void check_var(mawk_state_t *MAWK, register SYMTAB *p) +{ + switch(p->type) + { + case ST_NONE : /* new id */ + p->type = ST_VAR ; + p->stval.cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + p->stval.cp->type = C_NOINIT ; + break ; + + case ST_LOCAL_NONE : + p->type = ST_LOCAL_VAR ; + MAWK->active_funct->typev[p->offset] = ST_LOCAL_VAR ; + break ; + + case ST_VAR : + case ST_LOCAL_VAR : break ; + + default : + mawk_type_error(MAWK, p) ; + break ; + } +} + +/* we've seen an ID in a context where it should be an ARRAY, + check that's consistent with previous usage */ +static void check_array(mawk_state_t *MAWK, register SYMTAB *p) +{ + switch(p->type) + { + case ST_NONE : /* a new array */ + p->type = ST_ARRAY ; + p->stval.array = mawk_array_new(MAWK, NULL) ; + break ; + + case ST_ARRAY : + case ST_LOCAL_ARRAY : + break ; + + case ST_LOCAL_NONE : + p->type = ST_LOCAL_ARRAY ; + MAWK->active_funct->typev[p->offset] = ST_LOCAL_ARRAY ; + break ; + + default : mawk_type_error(MAWK, p) ; break ; + } +} + +static void mawk_code_array(mawk_state_t *MAWK, register SYMTAB *p) +{ + if ( is_local(p) ) mawk_code2op(MAWK, LA_PUSHA, p->offset) ; + else code2(MAWK, A_PUSHA, p->stval.array) ; +} + + +/* we've seen an ID as an argument to a user defined function */ +static void mawk_code_call_id(mawk_state_t *MAWK, register CA_REC *p, register SYMTAB *ip) +{ + p->call_offset = mawk_code_offset ; + /* This always get set now. So that fcall:relocate_arglist + works. */ + + switch( ip->type ) + { + case ST_VAR : + p->type = CA_EXPR ; + code2(MAWK, _PUSHI, ip->stval.cp) ; + break ; + + case ST_LOCAL_VAR : + p->type = CA_EXPR ; + mawk_code2op(MAWK, L_PUSHI, ip->offset) ; + break ; + + case ST_ARRAY : + p->type = CA_ARRAY ; + code2(MAWK, A_PUSHA, ip->stval.array) ; + break ; + + case ST_LOCAL_ARRAY : + p->type = CA_ARRAY ; + mawk_code2op(MAWK, LA_PUSHA, ip->offset) ; + break ; + + /* not enough info to code it now; it will have to + be patched later */ + + case ST_NONE : + p->type = ST_NONE ; + p->sym_p = ip ; + code2(MAWK, _PUSHI, &MAWK->code_call_id_dummy) ; + break ; + + case ST_LOCAL_NONE : + p->type = ST_LOCAL_NONE ; + p->type_p = & MAWK->active_funct->typev[ip->offset] ; + mawk_code2op(MAWK, L_PUSHI, ip->offset) ; + break ; + +#ifdef DEBUG + default : + mawk_bozo(MAWK, "mawk_code_call_id") ; +#endif + + } +} + +/* an RE by itself was coded as _MATCH0 , change to + push as an expression */ + +static void RE_as_arg(mawk_state_t *MAWK) +{ + mawk_cell_t *cp = MAWK_ZMALLOC(MAWK, mawk_cell_t) ; + + mawk_code_ptr -= 2 ; + cp->type = C_RE ; + cp->ptr = mawk_code_ptr[1].ptr ; + code2(MAWK, _PUSHC, cp) ; +} + +/* reset the active_code back to the MAIN block */ +static void switch_code_to_main(mawk_state_t *MAWK) +{ + switch(MAWK->scope) + { + case SCOPE_BEGIN : + *MAWK->begin_code_p = MAWK->active_code ; + MAWK->active_code = *MAWK->main_code_p ; + break ; + + case SCOPE_END : + *MAWK->end_code_p = MAWK->active_code ; + MAWK->active_code = *MAWK->main_code_p ; + break ; + + case SCOPE_FUNCT : + MAWK->active_code = *MAWK->main_code_p ; + break ; + + case SCOPE_MAIN : + break ; + } + MAWK->active_funct = (FBLOCK*) 0 ; + MAWK->scope = SCOPE_MAIN ; +} + + +void mawk_parse(mawk_state_t *MAWK) +{ + if (!MAWK->binary_loaded) { + if ( yyparse(MAWK) || MAWK->compile_error_count != 0 ) mawk_exit(MAWK, 2) ; + + mawk_scan_cleanup(MAWK) ; + mawk_set_code(MAWK) ; + /* code must be set before call to mawk_resolve_fcalls() */ + if ( MAWK->resolve_list ) mawk_resolve_fcalls(MAWK) ; + } + + if ( MAWK->compile_error_count != 0 ) mawk_exit(MAWK, 2) ; + if ( MAWK->dump_code_flag ) { mawk_dump_code(MAWK);} + if ( MAWK->dump_sym_flag ) { mawk_dump_sym_text(MAWK); } + if ((MAWK->dump_code_flag ) || ( MAWK->dump_sym_flag )) { mawk_exit(MAWK, 0); } + + (void)mawk_d2f(NULL); /* suppress compiler warning */ +} + + +void mawk_parser_include(mawk_state_t *MAWK, void *str) +{ + mawk_parser_push(MAWK); + + MAWK->ps.eof_flag = 0 ; + MAWK->ps.pfile_name = ((mawk_string_t *)str)->str; + MAWK->ps.buffp = MAWK->ps.buffer = (unsigned char *) mawk_zmalloc(MAWK, BUFFSZ + 1) ; + *MAWK->ps.buffp = '\0'; + if (mawk_scan_open(MAWK) == 1) + MAWK->token_lineno = MAWK->lineno = 1 ; + else + mawk_parser_pop(MAWK); +} diff --git a/src/libmawk/print.c b/src/libmawk/print.c new file mode 100644 index 0000000..765bc3e --- /dev/null +++ b/src/libmawk/print.c @@ -0,0 +1,567 @@ + +/******************************************** +print.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-1993. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include +#include +#include "bi_vars.h" +#include "bi_funct.h" +#include "memory.h" +#include "field.h" +#include "scan.h" +#include "files.h" +#include "vio.h" +#include "cell.h" + +static mawk_string_t *do_printf(mawk_state_t *, FILE_NODE *, char *, unsigned, mawk_cell_t *); +static void bad_conversion(mawk_state_t *, int, char *, char *); +static void write_error(mawk_state_t * MAWK); + +/* prototyping fprintf() or sprintf() is a loser as ellipses will + always cause problems with ansi compilers depending on what + they've already seen, + but we need them here and sometimes they are missing +*/ + +#ifdef NO_FPRINTF_IN_STDIO +int fprintf(FILE *, const char *, ...); +#endif +#ifdef NO_SPRINTF_IN_STDIO +int sprintf(char *, const char *, ...); +#endif + +/* Once mawk_execute() starts the sprintf code is (belatedly) the only + code allowed to use string_buff */ + +void mawk_print_cell(mawk_state_t *MAWK, register mawk_cell_t *p, register FILE_NODE *fnode) +{ + int len; + + switch (p->type) { + case C_NOINIT: + break; + case C_MBSTRN: + case C_STRING: + case C_STRNUM: + switch (len = string(p)->len) { + case 0: + break; + case 1: + mawk_vio_putc(MAWK, fnode->vf, string(p)->str[0]); + break; + + default: + mawk_vio_write(MAWK, fnode->vf, string(p)->str, len); + } + break; + + case C_NUM: + { + Int ival = mawk_d_to_I(p->d.dval); + const char *txt; + + txt = mawk_num_print_spec(ival); + if (txt != NULL) + mawk_vio_write_str(MAWK, fnode->vf, txt); + else if ((mawk_num_t) ival == p->d.dval) { /* integers print as "%[l]d" */ + char buff[64]; + int len; + len = sprintf(buff, INT_FMT, ival); + mawk_vio_write(MAWK, fnode->vf, buff, len); + } + else { + fnode->vf->imp->vprintf(MAWK, fnode->vf, string(MAWK_OFMT)->str, p->d.dval); + } + } + break; + + default: + mawk_bozo(MAWK, "bad cell passed to print_cell"); + } +} + +/* on entry to bi_print or bi_printf the stack is: + + sp[0] = an integer k + if ( k < 0 ) output is to a file with name in sp[-1] + { so open file and sp -= 2 } + + sp[0] = k >= 0 is the number of print args + sp[-k] holds the first argument +*/ + +mawk_cell_t *mawk_bi_print(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + /* sp is stack ptr passed in */ + register mawk_cell_t *p; + register int k; + FILE_NODE *fnode; + + k = sp->type; + if (k < 0) { + /* k holds redirection */ + if ((--sp)->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + fnode = mawk_file_find(MAWK, string(sp), k, 1); + free_STRING(string(sp)); + k = (--sp)->type; + /* k now has number of arguments */ + } + else + fnode = MAWK->fnode_stdout; + + if (k) { + p = sp - k; /* clear k variables off the stack */ + sp = p - 1; + k--; + + while (k > 0) { + if (fnode != NULL) { + mawk_print_cell(MAWK, p, fnode); + mawk_print_cell(MAWK, OFS, fnode); + } + mawk_cell_destroy(MAWK, p); + p++; + k--; + } + + if (fnode != NULL) + mawk_print_cell(MAWK, p, fnode); + mawk_cell_destroy(MAWK, p); + } + else { /* print $0 */ + sp--; + if (fnode != NULL) + mawk_print_cell(MAWK, &MAWK->field[0], fnode); + } + + if (fnode != NULL) { + mawk_print_cell(MAWK, ORS, fnode); + if (mawk_vio_error(MAWK, fnode->vf)) + write_error(MAWK); + } + return sp; +} + +/*---------- types and defs for doing printf and sprintf----*/ +#define PF_C 0 /* %c */ +#define PF_S 1 /* %s */ +#define PF_D 2 /* int conversion */ +#define PF_F 3 /* float conversion */ + +/* for switch on number of '*' and type */ +#define AST(num,type) ((PF_F+1)*(num)+(type)) + +typedef int (*PRINTER) (PTR, const char *, ...); +/*-------------------------------------------------------*/ + +static void bad_conversion(mawk_state_t *MAWK, int cnt, char *who, char *format) +{ + mawk_rt_error(MAWK, "improper conversion(number %d) in %s(\"%s\")", cnt, who, format); +} + + +int sprintf_wrapper(mawk_state_t *MAWK, char *ostr, const char *fmt, ...) +{ + int ret; + va_list ap; + va_start(ap, fmt); + ret = vsprintf(ostr, fmt, ap); + va_end(ap); + return ret; +} + + +/* the contents of format are preserved, + caller does mawk_cell_t cleanup + + This routine does both printf and sprintf (if fp==0) +*/ +static mawk_string_t *do_printf(mawk_state_t *MAWK, FILE_NODE *fnode, char *format, unsigned argcnt, mawk_cell_t *cp) +{ + /* argcnt number of args on eval stack */ + /* mawk_cell_t *cp ptr to an array of arguments (on the eval stack) */ + char save; + char *p; + register char *q = format; + register char *target; + int l_flag, h_flag; /* seen %ld or %hd */ + int ast_cnt; + int ast[2]; + Int Ival; + int num_conversion = 0; /* for error messages */ + char *who; /*ditto */ + int pf_type; /* conversion type */ + PRINTER printer; /* pts at fprintf() or sprintf() */ + +#ifdef SHORT_INTS + char xbuff[256]; /* splice in l qualifier here */ +#endif + + if (fnode == NULL) { /* doing sprintf */ + target = MAWK->sprintf_buff; + printer = (PRINTER) sprintf_wrapper; + who = "sprintf"; + } + else { /* doing printf */ + target = (char *) fnode->vf; /* will never change */ + printer = (PRINTER) fnode->vf->imp->vprintf; + who = "printf"; + } + + while (1) { + if (fnode) { /* printf */ + while (*q != '%') { + if (*q == 0) { + if (mawk_vio_error(MAWK, fnode->vf)) + write_error(MAWK); + /* return is ignored */ + return (mawk_string_t *) 0; + } + else { + mawk_vio_putc(MAWK, fnode->vf, *q); + q++; + } + } + } + else { /* sprintf */ + + while (*q != '%') + if (*q == 0) { + if (target > MAWK->sprintf_limit) { /* mawk_damaged */ + /* hope this works */ + mawk_rt_overflow(MAWK, "sprintf buffer", MAWK->sprintf_limit - MAWK->sprintf_buff); + } + else { /* really done */ + + mawk_string_t *retval; + int len = target - MAWK->sprintf_buff; + + retval = mawk_new_STRING0(MAWK, len); + memcpy(retval->str, MAWK->sprintf_buff, len); + return retval; + } + } + else + *target++ = *q++; + } + + + /* *q == '%' */ + num_conversion++; + + if (*++q == '%') { /* %% */ + if (fnode) + mawk_vio_putc(MAWK, fnode->vf, *q); + else + *target++ = *q; + + q++; + continue; + } + + /* mark the '%' with p */ + p = q - 1; + + /* eat the flags */ + while (*q == '-' || *q == '+' || *q == ' ' || *q == '#' || *q == '0') + q++; + + ast_cnt = 0; + if (*q == '*') { + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + ast[ast_cnt++] = d_to_i(cp++->d.dval); + argcnt--; + q++; + } + else + while (MAWK->scan_code[*(unsigned char *) q] == SC_DIGIT) + q++; + /* width is done */ + + if (*q == '.') { /* have precision */ + q++; + if (*q == '*') { + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + ast[ast_cnt++] = d_to_i(cp++->d.dval); + argcnt--; + q++; + } + else + while (MAWK->scan_code[*(unsigned char *) q] == SC_DIGIT) + q++; + } + + if (argcnt <= 0) + mawk_rt_error(MAWK, "not enough arguments passed to %s(\"%s\")", who, format); + + l_flag = h_flag = 0; + + if (*q == 'l') { + q++; + l_flag = 1; + } + else if (*q == 'h') { + q++; + h_flag = 1; + } + switch (*q++) { + case 's': + if (l_flag + h_flag) + bad_conversion(MAWK, num_conversion, who, format); + if (cp->type < C_STRING) + mawk_cast1_to_str(MAWK, cp); + pf_type = PF_S; + break; + + case 'c': + if (l_flag + h_flag) + bad_conversion(MAWK, num_conversion, who, format); + + switch (cp->type) { + case C_NOINIT: + Ival = 0; + break; + + case C_STRNUM: + case C_NUM: + Ival = mawk_d_to_I(cp->d.dval); + break; + + case C_STRING: + Ival = string(cp)->str[0]; + break; + + case C_MBSTRN: + mawk_check_strnum(MAWK, cp); + Ival = cp->type == C_STRING ? string(cp)->str[0] : mawk_d_to_I(cp->d.dval); + break; + + default: + mawk_bozo(MAWK, "printf %c"); + } + + pf_type = PF_C; + break; + + case 'd': + case 'o': + case 'x': + case 'X': + case 'i': + case 'u': + { + const char *txt; + + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); +#ifdef MAWK_PRINTF_INFNAN + txt = mawk_num_print_spec(cp->d.dval); + if (txt == NULL) { +#endif + Ival = mawk_d_to_I(cp->d.dval); + pf_type = PF_D; +#ifdef MAWK_PRINTF_INFNAN + } + else { + cp->ptr = (PTR) mawk_new_STRING(MAWK, txt); + cp->type = C_STRING; + cp->d.dval = 42; + p = "%s"; + pf_type = PF_S; + } +#endif + } + break; + +#ifndef MAWK_NO_FLOAT + case 'e': + case 'g': + case 'f': + case 'E': + case 'G': + if (h_flag + l_flag) + bad_conversion(MAWK, num_conversion, who, format); + if (cp->type != C_NUM) + mawk_cast1_to_num(MAWK, cp); + pf_type = PF_F; + break; +#endif + + default: + bad_conversion(MAWK, num_conversion, who, format); + } + + save = *q; + *q = 0; + +#ifdef SHORT_INTS + if (pf_type == PF_D) { + /* need to splice in long modifier */ + strcpy(xbuff, p); + + if (l_flag) /* do nothing */ + ; + else { + int k = q - p; + + if (h_flag) { + Ival = (short) Ival; + /* replace the 'h' with 'l' (really!) */ + xbuff[k - 2] = 'l'; + if (xbuff[k - 1] != 'd' && xbuff[k - 1] != 'i') + Ival &= 0xffff; + } + else { + /* the usual case */ + xbuff[k] = xbuff[k - 1]; + xbuff[k - 1] = 'l'; + xbuff[k + 1] = 0; + } + } + } +#endif + + /* ready to call printf() */ + switch (AST(ast_cnt, pf_type)) { + case AST(0, PF_C): + (*printer) (MAWK, (PTR) target, p, (int) Ival); + break; + + case AST(1, PF_C): + (*printer) (MAWK, (PTR) target, p, ast[0], (int) Ival); + break; + + case AST(2, PF_C): + (*printer) (MAWK, (PTR) target, p, ast[0], ast[1], (int) Ival); + break; + + case AST(0, PF_S): + (*printer) (MAWK, (PTR) target, p, string(cp)->str); + break; + + case AST(1, PF_S): + (*printer) (MAWK, (PTR) target, p, ast[0], string(cp)->str); + break; + + case AST(2, PF_S): + (*printer) (MAWK, (PTR) target, p, ast[0], ast[1], string(cp)->str); + break; + +#ifdef SHORT_INTS +#define FMT xbuff /* format in xbuff */ +#else +#define FMT p /* p -> format */ +#endif + case AST(0, PF_D): + (*printer) (MAWK, (PTR) target, FMT, Ival); + break; + + case AST(1, PF_D): + (*printer) (MAWK, (PTR) target, FMT, ast[0], Ival); + break; + + case AST(2, PF_D): + (*printer) (MAWK, (PTR) target, FMT, ast[0], ast[1], Ival); + break; + +#undef FMT + + + case AST(0, PF_F): + (*printer) (MAWK, (PTR) target, p, cp->d.dval); + break; + + case AST(1, PF_F): + (*printer) (MAWK, (PTR) target, p, ast[0], cp->d.dval); + break; + + case AST(2, PF_F): + (*printer) (MAWK, (PTR) target, p, ast[0], ast[1], cp->d.dval); + break; + } + if (fnode == NULL) + while (*target) + target++; + *q = save; + argcnt--; + cp++; + } +} + +mawk_cell_t *mawk_bi_printf(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + register int k; + register mawk_cell_t *p; + FILE_NODE *fnode; + + k = sp->type; + if (k < 0) { + /* k has redirection */ + if ((--sp)->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + fnode = mawk_file_find(MAWK, string(sp), k, 1); + free_STRING(string(sp)); + k = (--sp)->type; + /* k is now number of args including format */ + } + else + fnode = MAWK->fnode_stdout; + + sp -= k; /* sp points at the format string */ + k--; + + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + do_printf(MAWK, fnode, string(sp)->str, k, sp + 1); + free_STRING(string(sp)); + + /* cleanup arguments on eval stack */ + for (p = sp + 1; k; k--, p++) + mawk_cell_destroy(MAWK, p); + return --sp; +} + +mawk_cell_t *mawk_bi_sprintf(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + mawk_cell_t *p; + int argcnt = sp->type; + mawk_string_t *sval; + + sp -= argcnt; /* sp points at the format string */ + argcnt--; + + if (sp->type != C_STRING) + mawk_cast1_to_str(MAWK, sp); + sval = do_printf(MAWK, NULL, string(sp)->str, argcnt, sp + 1); + free_STRING(string(sp)); + sp->ptr = (PTR) sval; + + /* cleanup */ + for (p = sp + 1; argcnt; argcnt--, p++) + mawk_cell_destroy(MAWK, p); + + return sp; +} + + +static void write_error(mawk_state_t * MAWK) +{ + mawk_errmsg(MAWK, errno, "write failure"); + mawk_exit(MAWK, 2); +} diff --git a/src/libmawk/print_dummy.c b/src/libmawk/print_dummy.c new file mode 100644 index 0000000..a3079d2 --- /dev/null +++ b/src/libmawk/print_dummy.c @@ -0,0 +1,31 @@ +/******************************************** +print_dummy.c + +libmawk changes (C) 2013, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" + +mawk_cell_t *mawk_bi_print(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_printf(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + abort(); +} + +mawk_cell_t *mawk_bi_sprintf(mawk_state_t *MAWK, mawk_cell_t *sp) +{ + abort(); +} + + diff --git a/src/libmawk/re_cmpl.c b/src/libmawk/re_cmpl.c new file mode 100644 index 0000000..9797947 --- /dev/null +++ b/src/libmawk/re_cmpl.c @@ -0,0 +1,357 @@ + +/******************************************** +re_cmpl.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include +#include "mawk.h" +#include "memory.h" +#include "scan.h" +#include "regexp.h" +#include "repl.h" + + +static mawk_cell_t *REPL_compile(mawk_state_t *, mawk_string_t *); + +static const char efmt[] = "regular expression compile failed (%s)\n%s"; + +/* compile a mawk_string_t to a regular expression machine. + Search a list of pre-compiled strings first +*/ +PTR mawk_re_compile(mawk_state_t *MAWK, mawk_string_t *sval) +{ + register RE_NODE *p; + RE_NODE *q; + char *s; + + /* search list */ + s = sval->str; + p = MAWK->re_list; + q = (RE_NODE *) 0; + while (p) { + if (strcmp(s, p->sval->str) == 0) { /* found */ + if (!q) /* already at front */ + goto _return; + else { /* delete from list for move to front */ + + q->link = p->link; + goto found; + } + + } + else { + q = p; + p = p->link; + } + } + + /* not found */ + p = MAWK_ZMALLOC(MAWK, RE_NODE); + p->sval = sval; + + sval->ref_cnt++; + if (!(p->re = mawk_REcompile(MAWK, s))) { + if (MAWK->mawk_state == EXECUTION) + mawk_rt_error(MAWK, efmt, mawk_REerrlist[MAWK->REerrno], s); + else { /* compiling */ + + mawk_compile_error(MAWK, efmt, mawk_REerrlist[MAWK->REerrno], s); + return (PTR) 0; + } + } + + +found: +/* mawk_insert p at the front of the list */ + p->link = MAWK->re_list; + MAWK->re_list = p; + +_return: + +#if 0 +#ifdef DEBUG + if (MAWK->dump_RE) + mawk_REmprint(p->re, stderr); +#endif +#endif + return p->re; +} + + + +/* this is only used by mawk_da() */ + + +char *mawk_re_uncompile(mawk_state_t *MAWK, PTR m) +{ + register RE_NODE *p; + + for (p = MAWK->re_list; p; p = p->link) + if (p->re == m) + return p->sval->str; +#ifdef DEBUG + mawk_bozo(MAWK, "non compiled machine"); +#endif + return NULL; +} + + + +/*=================================================*/ +/* replacement operations */ + +/* create a replacement mawk_cell_t from a mawk_string_t * */ + +static mawk_cell_t *REPL_compile(mawk_state_t *MAWK, mawk_string_t *sval) +{ + int i = 0; + register char *p = sval->str; + register char *q; + char *xbuff; + mawk_cell_t *cp; + + q = xbuff = (char *) mawk_zmalloc(MAWK, sval->len + 1); + + while (1) { + switch (*p) { + case 0: + *q = 0; + goto done; + + case '\\': + if (p[1] == '&' || p[1] == '\\') { + *q++ = p[1]; + p += 2; + continue; + } + else + break; + + case '&': + /* if empty we don't need to make a node */ + if (q != xbuff) { + *q = 0; + split_buff[i++] = mawk_new_STRING(MAWK, xbuff); + } + /* and a null node for the '&' */ + split_buff[i++] = (mawk_string_t *) 0; + /* reset */ + p++; + q = xbuff; + continue; + + default: + break; + } + + *q++ = *p++; + } + +done: + /* if we have one empty string it will get made now */ + if (q > xbuff || i == 0) + split_buff[i++] = mawk_new_STRING(MAWK, xbuff); + + /* This will never happen */ + if (i > MAX_SPLIT) + mawk_overflow(MAWK, "replacement pieces", MAX_SPLIT); + + cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); + if (i == 1 && split_buff[0]) { + cp->type = C_REPL; + cp->ptr = (PTR) split_buff[0]; + } + else { + mawk_string_t **sp = (mawk_string_t **) + (cp->ptr = mawk_zmalloc(MAWK, sizeof(mawk_string_t *) * i)); + int j = 0; + + while (j < i) + *sp++ = split_buff[j++]; + + cp->type = C_REPLV; + cp->d.vcnt = i; + } + mawk_zfree(MAWK, xbuff, sval->len + 1); + return cp; +} + +/* free memory used by a replacement mawk_cell_t */ + +void mawk_repl_destroy(mawk_state_t *MAWK, register mawk_cell_t *cp) +{ + register mawk_string_t **p; + unsigned cnt; + + if (cp->type == C_REPL) + free_STRING(string(cp)); + else { /* an C_REPLV */ + + p = (mawk_string_t **) cp->ptr; + for (cnt = cp->d.vcnt; cnt; cnt--) { + if (*p) { + free_STRING(*p); + } + p++; + } + mawk_zfree(MAWK, cp->ptr, cp->d.vcnt * sizeof(mawk_string_t *)); + } +} + +/* copy a C_REPLV cell to another mawk_cell_t */ + +mawk_cell_t *mawk_replv_cpy(mawk_state_t *MAWK, mawk_cell_t *target, const mawk_cell_t *source) +{ + mawk_string_t **t, **s; + unsigned cnt; + + target->type = C_REPLV; + cnt = target->d.vcnt = source->d.vcnt; + target->ptr = (PTR) mawk_zmalloc(MAWK, cnt * sizeof(mawk_string_t *)); + + t = (mawk_string_t **) target->ptr; + s = (mawk_string_t **) source->ptr; + while (cnt) { + cnt--; + if (*s) + (*s)->ref_cnt++; + *t++ = *s++; + } + return target; +} + + +/* search the list (with move to the front) for a compiled + separator. + return a ptr to a mawk_cell_t (C_REPL or C_REPLV) +*/ + +mawk_cell_t *mawk_repl_compile(mawk_state_t *MAWK, mawk_string_t *sval) +{ + register REPL_NODE *p; + REPL_NODE *q; + char *s; + + /* search the list */ + s = sval->str; + p = MAWK->repl_list; + q = (REPL_NODE *) 0; + while (p) { + if (strcmp(s, p->sval->str) == 0) { /* found */ + if (!q) /* already at front */ + return p->cp; + else { /* delete from list for move to front */ + + q->link = p->link; + goto found; + } + + } + else { + q = p; + p = p->link; + } + } + + /* not found */ + p = MAWK_ZMALLOC(MAWK, REPL_NODE); + p->sval = sval; + sval->ref_cnt++; + p->cp = REPL_compile(MAWK, sval); + +found: +/* mawk_insert p at the front of the list */ + p->link = MAWK->repl_list; + MAWK->repl_list = p; + return p->cp; +} + +/* return the string for a mawk_cell_t or type REPL or REPLV, + this is only used by mawk_da() */ + + +char *mawk_repl_uncompile(mawk_state_t *MAWK, mawk_cell_t *cp) +{ + register REPL_NODE *p = MAWK->repl_list; + + if (cp->type == C_REPL) { + while (p) { + if (p->cp->type == C_REPL && p->cp->ptr == cp->ptr) + return p->sval->str; + else + p = p->link; + } + } + else { + while (p) { + if (p->cp->type == C_REPLV && memcmp(cp->ptr, p->cp->ptr, cp->d.vcnt * sizeof(mawk_string_t *)) + == 0) + return p->sval->str; + else + p = p->link; + } + } + +#if DEBUG + mawk_bozo(MAWK, "unable to uncompile an repl"); +#endif + return NULL; +} + +/* + convert a C_REPLV to C_REPL + replacing the &s with sval +*/ + +mawk_cell_t *mawk_replv_to_repl(mawk_state_t *MAWK, mawk_cell_t *cp, mawk_string_t *sval) +{ + register mawk_string_t **p; + mawk_string_t **sblock = (mawk_string_t **) cp->ptr; + unsigned cnt, vcnt = cp->d.vcnt; + unsigned len; + char *target; + +#ifdef DEBUG + if (cp->type != C_REPLV) + mawk_bozo(MAWK, "not replv"); +#endif + + p = sblock; + cnt = vcnt; + len = 0; + while (cnt--) { + if (*p) + len += (*p++)->len; + else { + *p++ = sval; + sval->ref_cnt++; + len += sval->len; + } + } + cp->type = C_REPL; + cp->ptr = (PTR) mawk_new_STRING0(MAWK, len); + + p = sblock; + cnt = vcnt; + target = string(cp)->str; + while (cnt--) { + memcpy(target, (*p)->str, (*p)->len); + target += (*p)->len; + free_STRING(*p); + p++; + } + + mawk_zfree(MAWK, sblock, vcnt * sizeof(mawk_string_t *)); + return cp; +} diff --git a/src/libmawk/regexp.h b/src/libmawk/regexp.h new file mode 100644 index 0000000..db78bd5 --- /dev/null +++ b/src/libmawk/regexp.h @@ -0,0 +1,26 @@ + +/******************************************** +regexp.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + + +PTR mawk_REcompile(mawk_state_t *MAWK, char *); +int mawk_REtest(mawk_state_t *MAWK, char *, PTR); +char *mawk_REmatch(mawk_state_t *MAWK, char *, PTR, unsigned *, int); +#ifdef DEBUG +#include +void mawk_REmprint(PTR m, FILE *f); +#endif + +extern const char *mawk_REerrlist[]; diff --git a/src/libmawk/regression/Makefile b/src/libmawk/regression/Makefile new file mode 100644 index 0000000..23d4485 --- /dev/null +++ b/src/libmawk/regression/Makefile @@ -0,0 +1,82 @@ +# Run regression tests of all kind +# make: run all tests until the first error +# make -j8: same, parallel on 8 threads +# make decl.dab run binary save/load test on decl.awk (works on any .awk file anywhere) +# make decl.diff run decl awk test - works on any ./*.awk +# make decl.out run decl awk test, save output don't compare - works on any ./*.awk +# make stage1.diff run zfifo test stage1.zf +# make Makefile.dabs update da bin test list +# make fpe.out run the FPE (floating-point-exception) tests + +LMAWK=../lmawk +ZFTEST=zfifo/zfifo_test + +REF_AWK=mawk +IN=test.in + +# awk tests with reference output +TESTS = io_print1.diff io_printf.diff \ + nan_if.diff nan_ops.diff nan_io.diff nan_isnan.diff \ + math_func.diff math_fmod.diff arr_orig.diff arr_order.diff \ + getln_nul.diff valueof.diff wc.diff reg012.diff wfrq0.diff \ + decl.diff fpe.diff re_test.diff call.diff acall.diff \ + stack_grow.diff + +# zfifo tests +ZFTESTS = stage1.diff stage2.diff large_writes.diff small_reads.diff + +# da_bin tests +include Makefile.dabs + +.SUFFIXES: .diff .out .ref .awk .zf .dab + +# the .zf.out rule dependency on ZFTEST doesn't really work +all: $(ZFTEST) + make all_ + +all_: $(TESTS) $(ZFTESTS) $(DABTESTS) + @echo "*** QC PASS ***" + +.awk.out: $(LMAWK) + -$(LMAWK) -f $*.awk < $(IN) > $@ 2>&1; true + +.out.diff: $*.ref + @diff -u $*.ref $*.out + @rm $*.out + +#.awk.ref: +# $(REF_AWK) -f $*.awk < $(IN) > $@ 2>&1 ; true + +### non-awk generic tests ### +.zf.out: $(ZFTEST) + $(ZFTEST) < $*.zf > $@ 2>&1; true + +$(ZFTEST): ../zfifo.h ../zfifo.c zfifo/zfifo_test.c + cd zfifo && make + +.awk.dab: $(LMAWK) ./da_bin_test.sh + LMAWK=$(LMAWK) ./da_bin_test.sh $*.awk + +Makefile.dabs: + @echo "Generating Makefile.dabs..." + @echo -n "DABTESTS=" > $@ + @ls *.awk ../examples/*.awk | sed "s/.awk$$/.dab\\\\/" >>$@ + @echo "" >>$@ + + +### custom tests ### +fpe.out: fpe/fpetest1.awk fpe/fpetest2.awk fpe/fpetest3.awk + @echo "-> fpe test" + @-$(LMAWK) -f fpe/fpetest1.awk >/dev/null 2>&1; r1=$$?; \ + $(LMAWK) -f fpe/fpetest2.awk >/dev/null 2>&1; r2=$$?; \ + $(LMAWK) -f fpe/fpetest3.awk 2>/dev/null > fpe/fpetest3.tmp; r3=$$?; \ + $(LMAWK) -f fpe/eval.awk -v "r1=$$r1" -v "r2=$$r2" -v "r3=$$r3" < fpe/fpetest3.tmp > $@ + +re_test.out: re_test/re_test + @echo "-> re_test" + @re_test/re_test >$@ + +clean: + -rm *.out re_test/*.o re_test/re_test zfifo/*.o zfifo/zfifo_test 2>/dev/null ; true + +FORCE: diff --git a/src/libmawk/regression/Makefile.dabs b/src/libmawk/regression/Makefile.dabs new file mode 100644 index 0000000..e7013a2 --- /dev/null +++ b/src/libmawk/regression/Makefile.dabs @@ -0,0 +1,24 @@ +DABTESTS=arr_order.dab\ +arr_orig.dab\ +decl.dab\ +getln_nul.dab\ +io_print1.dab\ +io_printf.dab\ +math_fmod.dab\ +math_func.dab\ +nan_if.dab\ +nan_io.dab\ +nan_isnan.dab\ +nan_ops.dab\ +reg012.dab\ +valueof.dab\ +wc.dab\ +wfrq0.dab\ +../examples/ct_length.dab\ +../examples/decl.dab\ +../examples/deps.dab\ +../examples/eatc.dab\ +../examples/gdecl.dab\ +../examples/nocomment.dab\ +../examples/primes.dab\ +../examples/qsort.dab\ diff --git a/src/libmawk/regression/acall.awk b/src/libmawk/regression/acall.awk new file mode 100644 index 0000000..2d0f8c7 --- /dev/null +++ b/src/libmawk/regression/acall.awk @@ -0,0 +1,13 @@ +BEGIN { + ARG[1] = 3 + ARG[2] = 9 + ARG[3] = "haha" + c = acall("f1", "ARG") + print c +} + +function f1(a, b, s ,tmp) +{ + tmp = s "=" a*b + return tmp +} diff --git a/src/libmawk/regression/acall.ref b/src/libmawk/regression/acall.ref new file mode 100644 index 0000000..c6539ae --- /dev/null +++ b/src/libmawk/regression/acall.ref @@ -0,0 +1 @@ +haha=27 diff --git a/src/libmawk/regression/arr_order.awk b/src/libmawk/regression/arr_order.awk new file mode 100644 index 0000000..c678447 --- /dev/null +++ b/src/libmawk/regression/arr_order.awk @@ -0,0 +1,19 @@ +# test an UB: posix doesn't tell the order of eval +# intuitively right side should happen first so that the left +# side would create A[1] again. +# +# Original mawk did it the other way around, leaving A[1] empty (result: 1) +# gawk 4.0.1 does right side first and returns 7 +# lmawk with the virtualized array support does the same, returns 7 + +function side_effect() +{ + delete A[1] + return 6 +} + +BEGIN { +# UB: + A[1] = side_effect() + print A[1]+1 +} diff --git a/src/libmawk/regression/arr_order.ref b/src/libmawk/regression/arr_order.ref new file mode 100644 index 0000000..7f8f011 --- /dev/null +++ b/src/libmawk/regression/arr_order.ref @@ -0,0 +1 @@ +7 diff --git a/src/libmawk/regression/arr_orig.awk b/src/libmawk/regression/arr_orig.awk new file mode 100644 index 0000000..dd5172a --- /dev/null +++ b/src/libmawk/regression/arr_orig.awk @@ -0,0 +1,17 @@ +# test whether the new array code does all these properly for an orig array +# (array with no side effect) +# This test should have the same result as with mawk or gawk +BEGIN { + A[1] = 5 + A[1] += 2 + A[1] -= 1 + A[1]++ + A[1] *= 6 + A[1]-- + A[1] /= 6 + ++A[1] + A[1] ^= 2 + --A[1] + A[1] %= 765 + print A[1] +} diff --git a/src/libmawk/regression/arr_orig.ref b/src/libmawk/regression/arr_orig.ref new file mode 100644 index 0000000..537687d --- /dev/null +++ b/src/libmawk/regression/arr_orig.ref @@ -0,0 +1 @@ +60.3611 diff --git a/src/libmawk/regression/call.awk b/src/libmawk/regression/call.awk new file mode 100644 index 0000000..4412185 --- /dev/null +++ b/src/libmawk/regression/call.awk @@ -0,0 +1,10 @@ +BEGIN { + c = call("f1", 3, 9, "haha") + print c +} + +function f1(a, b, s ,tmp) +{ + tmp = s "=" a*b + return tmp +} diff --git a/src/libmawk/regression/call.ref b/src/libmawk/regression/call.ref new file mode 100644 index 0000000..c6539ae --- /dev/null +++ b/src/libmawk/regression/call.ref @@ -0,0 +1 @@ +haha=27 diff --git a/src/libmawk/regression/da_bin_test.sh b/src/libmawk/regression/da_bin_test.sh new file mode 100755 index 0000000..1153d83 --- /dev/null +++ b/src/libmawk/regression/da_bin_test.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# bin dump round trip test: on all known scripts +# - dump asm +# - compile to binary, load the binary and dump asm +# - compare the two asms +# Asms shall match for any script (except for pointer values). + +# replace pointers with <> +ptr_filt() +{ + sed "s/<<[x0-9a-fA-F]*>>/<>/g" +} + +# the test procedure for awk source $1 +bintest() +{ + ($LMAWK -Wdump -f $1 || return 1) | ptr_filt > $1.orig + $LMAWK -Wcompile -f $1 > $1.bin || return 1 + ($LMAWK -Wdump -b $1.bin || return 1) | ptr_filt > $1.new + diff $1.orig $1.new || return 1 + rm $1.orig $1.new $1.bin +} + +bintest $1 + + diff --git a/src/libmawk/regression/decl.awk b/src/libmawk/regression/decl.awk new file mode 100644 index 0000000..5460539 --- /dev/null +++ b/src/libmawk/regression/decl.awk @@ -0,0 +1,143 @@ + +# parse a C declaration by recursive descent +# based on a C program in KR ANSI edition +# +# run on a C file it finds the declarations +# +# restrictions: one declaration per line +# doesn't understand struct {...} +# makes assumptions about type names +# +# +# some awks need double escapes on strings used as +# regular expressions. If not run on mawk, use gdecl.awk + + +################################################ +# lexical scanner -- gobble() +# input : string s -- treated as a regular expression +# gobble eats SPACE, then eats longest match of s off front +# of global variable line. +# Cuts the matched part off of line +# + + +function gobble(s, x) +{ + sub( /^ /, "", line) # eat SPACE if any + + # surround s with parenthesis to make sure ^ acts on the + # whole thing + + match(line, "^" "(" s ")") + x = substr(line, 1, RLENGTH) + line = substr(line, RLENGTH+1) + return x +} + + +function ptr_to(n, x) # print "pointer to" , n times +{ n = int(n) + if ( n <= 0 ) return "" + x = "pointer to" ; n-- + while ( n-- ) x = x " pointer to" + return x +} + + +#recursively get a decl +# returns an english description of the declaration or +# "" if not a C declaration. + +function decl( x, t, ptr_part) +{ + + x = gobble("[* ]+") # get list of *** ... + gsub(/ /, "", x) # remove all SPACES + ptr_part = ptr_to( length(x) ) + + # We expect to see either an identifier or '(' + # + + if ( gobble("\(") ) + { + # this is the recursive descent part + # we expect to match a declaration and closing ')' + # If not return "" to indicate failure + + if ( (x = decl()) == "" || gobble( "\)" ) == "" ) return "" + + } + else # expecting an identifier + { + if ( (x = gobble(id)) == "" ) return "" + x = x ":" + } + + # finally look for () + # or [ opt_size ] + + while ( 1 ) + if ( gobble( funct_mark ) ) x = x " function returning" + else + if ( t = gobble( array_mark ) ) + { gsub(/ /, "", t) + x = x " array" t " of" + } + else break + + + x = x " " ptr_part + return x +} + + +BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" + funct_mark = "\([ \t]*\)" + array_mark = "\[[ \t]*[_A-Za-z0-9]*[ \t]*\]" + +# I've assumed types are keywords or all CAPS or end in _t +# Other conventions could be added. + + type0 = "int|char|short|long|double|float|void" + type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS + type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t + + types = "(" type0 "|" type1 "|" type2 ")" +} + + +{ + + gsub( "/\*([^*]|\*[^/])*(\*/|$)" , " ") # remove comments + gsub( /[ \t]+/, " ") # squeeze white space to a single space + + + line = $0 + + scope = gobble( "extern|static" ) + + if ( type = gobble("(struct|union|enum) ") ) + type = type gobble(id) # get the tag + else + { + + type = gobble("(un)?signed ") gobble( types ) + + } + + if ( ! type ) next + + if ( (x = decl()) && gobble( ";") ) + { + x = x " " type + if ( scope ) x = x " (" scope ")" + gsub( / +/, " ", x) # + print x + } + +} + + + + diff --git a/src/libmawk/regression/decl.ref b/src/libmawk/regression/decl.ref new file mode 100644 index 0000000..cd45e90 --- /dev/null +++ b/src/libmawk/regression/decl.ref @@ -0,0 +1,10 @@ +hash: function returning unsigned (extern) +last_dhash: unsigned (static) +A: ARRAY +sval: pointer to STRING +cflag: int +A: ARRAY +d: double +cflag: int +ap: pointer to ANODE +signal: function returning pointer to function returning void diff --git a/src/libmawk/regression/fpe/eval.awk b/src/libmawk/regression/fpe/eval.awk new file mode 100644 index 0000000..54d2e78 --- /dev/null +++ b/src/libmawk/regression/fpe/eval.awk @@ -0,0 +1,26 @@ +# check output for nans +/[nN][aA][nN]|[?]/ { nan++ } + +END { + if (r1 > 128) print "test1 failed" + if (r2 > 128) print "test2 failed" + if (r3 > 128) print "test3 failed" + +# return values should all be 0 if ignoring FPEs (e.g. with IEEE754) +# or all 2 if trapping FPEs + if ((r1 == r2) && (r2 == r3)) { + print "consistent FPE results: all ", r1 + if (r1 == 0) + print "style: ignoring floating exceptions" + else + print "style: trapping floating exceptions" + } + else + print "inconsistent FPE results: ", r1, r2, r3 + +# test3 must print nan + if (!nan) { + print "but the library is not IEEE754 compatible" + print "test 3 failed" + } +} diff --git a/src/libmawk/regression/fpe/fpetest1.awk b/src/libmawk/regression/fpe/fpetest1.awk new file mode 100644 index 0000000..4fcfb4d --- /dev/null +++ b/src/libmawk/regression/fpe/fpetest1.awk @@ -0,0 +1 @@ +BEGIN{ print 4/0 } diff --git a/src/libmawk/regression/fpe/fpetest2.awk b/src/libmawk/regression/fpe/fpetest2.awk new file mode 100644 index 0000000..c9aaccf --- /dev/null +++ b/src/libmawk/regression/fpe/fpetest2.awk @@ -0,0 +1,5 @@ +BEGIN { + x = 100 + do { y = x ; x *= 1000 } while ( y != x ) + print "loop terminated" +} diff --git a/src/libmawk/regression/fpe/fpetest3.awk b/src/libmawk/regression/fpe/fpetest3.awk new file mode 100644 index 0000000..8246bc3 --- /dev/null +++ b/src/libmawk/regression/fpe/fpetest3.awk @@ -0,0 +1 @@ +BEGIN{ print log(-8) } diff --git a/src/libmawk/regression/fpe.ref b/src/libmawk/regression/fpe.ref new file mode 100644 index 0000000..7f91987 --- /dev/null +++ b/src/libmawk/regression/fpe.ref @@ -0,0 +1,2 @@ +consistent FPE results: all 0 +style: ignoring floating exceptions diff --git a/src/libmawk/regression/getln_nul.awk b/src/libmawk/regression/getln_nul.awk new file mode 100644 index 0000000..0fbde41 --- /dev/null +++ b/src/libmawk/regression/getln_nul.awk @@ -0,0 +1,4 @@ +BEGIN { + res=(getline < "") + print "getline=" res , "\"" $0 "\"" +} diff --git a/src/libmawk/regression/getln_nul.ref b/src/libmawk/regression/getln_nul.ref new file mode 100644 index 0000000..b1536ef --- /dev/null +++ b/src/libmawk/regression/getln_nul.ref @@ -0,0 +1 @@ +getline=-1 "" diff --git a/src/libmawk/regression/io_print1.awk b/src/libmawk/regression/io_print1.awk new file mode 100644 index 0000000..92a00d8 --- /dev/null +++ b/src/libmawk/regression/io_print1.awk @@ -0,0 +1,8 @@ +BEGIN { + a="hello world" + print "str:", a + a=1 + print "int:", a + a=1.234 + print "float:", a +} \ No newline at end of file diff --git a/src/libmawk/regression/io_print1.ref b/src/libmawk/regression/io_print1.ref new file mode 100644 index 0000000..0db0811 --- /dev/null +++ b/src/libmawk/regression/io_print1.ref @@ -0,0 +1,3 @@ +str: hello world +int: 1 +float: 1.234 diff --git a/src/libmawk/regression/io_printf.awk b/src/libmawk/regression/io_printf.awk new file mode 100644 index 0000000..fa6070c --- /dev/null +++ b/src/libmawk/regression/io_printf.awk @@ -0,0 +1,9 @@ +BEGIN { + a="hello world" + printf("str: %s\n", a); + a=1 + printf("int1: %d\n", a); + a=1.234 + printf("int2: %d\n", a); + printf("float: %f\n", a); +} \ No newline at end of file diff --git a/src/libmawk/regression/io_printf.ref b/src/libmawk/regression/io_printf.ref new file mode 100644 index 0000000..9e0d594 --- /dev/null +++ b/src/libmawk/regression/io_printf.ref @@ -0,0 +1,4 @@ +str: hello world +int1: 1 +int2: 1 +float: 1.234000 diff --git a/src/libmawk/regression/large_writes.ref b/src/libmawk/regression/large_writes.ref new file mode 100644 index 0000000..3312faa --- /dev/null +++ b/src/libmawk/regression/large_writes.ref @@ -0,0 +1,10 @@ +>alloc: 0 +>write: 80 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' +>write: 80 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' +>dump + Size: 160/0 + blk 080 from 000 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + blk 080 from 000 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + stg 0 '' diff --git a/src/libmawk/regression/large_writes.zf b/src/libmawk/regression/large_writes.zf new file mode 100644 index 0000000..4c44a40 --- /dev/null +++ b/src/libmawk/regression/large_writes.zf @@ -0,0 +1,5 @@ +alloc +# write 2x80 chars +write abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12 +write abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12 +dump diff --git a/src/libmawk/regression/libmawk/Makefile b/src/libmawk/regression/libmawk/Makefile new file mode 100644 index 0000000..ae1ee36 --- /dev/null +++ b/src/libmawk/regression/libmawk/Makefile @@ -0,0 +1,27 @@ +ROOT=../../.. + +all: app + +include $(ROOT)/libmawk/Makefile.conf +$(ROOT)/libmawk/libmawk.a: FORCE + cd $(ROOT)/libmawk && make libmawk.a + + +OBJS = func_call.o +CFLAGS = -I$(ROOT) + +app: $(OBJS) $(ROOT)/libmawk/libmawk.a + $(CC) $(LDFLAGS) $(OBJS) $(ROOT)/libmawk/libmawk.a -o $@ $(MATHLIB) + +func_call.o: func_call.c + +run: func_call + ./func_cal -f test.awk + +clean: + rm -f $(OBJS) func_call + +distclean: clean + + +FORCE: diff --git a/src/libmawk/regression/libmawk/func_call.c b/src/libmawk/regression/libmawk/func_call.c new file mode 100644 index 0000000..62965c4 --- /dev/null +++ b/src/libmawk/regression/libmawk/func_call.c @@ -0,0 +1,28 @@ +#include +#include + +int main(int argc, char **argv) +{ + mawk_state_t *m; + mawk_cell_t ret = libmawk_empty_cell; + + /* init a context, execute BEGIN */ + m = libmawk_initialize(argc, argv); + if (m == NULL) { + fprintf(stderr, "libmawk_initialize failed, exiting\n"); + return 1; + } + + if (libmawk_call_function(m, "foo", &ret, "dfsQ", (int)42, (double)1.234, (char *)"test string1.", 1) == MAWK_EXER_FUNCRET) { + char buff[32]; + printf("app: error: function retuned; return value of foo is '%s'\n", libmawk_print_cell(m, &ret, buff, sizeof(buff))); + libmawk_cell_destroy(m, &ret); + } + else { + printf("app: OK: function foo didn't return\n"); + } + + libmawk_uninitialize(m); + return 0; +} + diff --git a/src/libmawk/regression/libmawk/test.awk b/src/libmawk/regression/libmawk/test.awk new file mode 100644 index 0000000..8c37e7d --- /dev/null +++ b/src/libmawk/regression/libmawk/test.awk @@ -0,0 +1,9 @@ +function foo(a, b, c) +{ + print "script: foo(" a "," b "," c ")" + return a+b +} + +BEGIN { print "script: BEGIN" } + { print "script: input: \"" $0 "\"" } +END { print "script: END" } diff --git a/src/libmawk/regression/math_fmod.awk b/src/libmawk/regression/math_fmod.awk new file mode 100644 index 0000000..8498865 --- /dev/null +++ b/src/libmawk/regression/math_fmod.awk @@ -0,0 +1,6 @@ +BEGIN { + A[1]=200000000 + for(n = 0; n < 1.00000; n+=1) + A[1] %= 345345 + print A[1] +} diff --git a/src/libmawk/regression/math_fmod.ref b/src/libmawk/regression/math_fmod.ref new file mode 100644 index 0000000..be1f02b --- /dev/null +++ b/src/libmawk/regression/math_fmod.ref @@ -0,0 +1 @@ +45245 diff --git a/src/libmawk/regression/math_func.awk b/src/libmawk/regression/math_func.awk new file mode 100644 index 0000000..9d7b5f1 --- /dev/null +++ b/src/libmawk/regression/math_func.awk @@ -0,0 +1,12 @@ +BEGIN { + nan="nan"+0 + print "atan2", atan2(nan, nan), atan2(1, nan), atan2(nan, 1), atan2(1, 0) + print "log ", log(nan), log(0), log(-1) + print "sqrt ", sqrt(nan), sqrt(0), sqrt(-1) + print "exp ", exp(nan) + print "sin ", sin(nan) + print "cos ", sin(nan) + print "tan ", sin(nan) + print "int ", int(nan) + print "pow ", (nan ^ nan), (1 ^ nan), (nan ^ 1), ((-1) ^ 1.2) +} diff --git a/src/libmawk/regression/math_func.ref b/src/libmawk/regression/math_func.ref new file mode 100644 index 0000000..1517fbc --- /dev/null +++ b/src/libmawk/regression/math_func.ref @@ -0,0 +1,9 @@ +atan2 nan nan nan 1.5708 +log nan nan nan +sqrt nan 0 nan +exp nan +sin nan +cos nan +tan nan +int nan +pow nan nan nan nan diff --git a/src/libmawk/regression/mawktest.dat b/src/libmawk/regression/mawktest.dat new file mode 100644 index 0000000..e4e0007 --- /dev/null +++ b/src/libmawk/regression/mawktest.dat @@ -0,0 +1,107 @@ + +#include + +extern unsigned hash() ; + +/* An array A is a pointer to an array of struct array, + which is two hash tables in one. One for strings + and one for doubles. + + each array is of size A_HASH_PRIME. + + When an index is deleted via delete A[i], the + ANODE is not removed from the hash chain. A[i].cp + and A[i].sval are both freed and sval is set NULL. + This method of deletion simplifies for( i in A ) loops. + + On the D_ANODE list, we use real deletion and move to the + front on access. + + Separate nodes (as opposed to one type of node on two lists) + to + (1) d1 != d2, but sprintf(A_FMT,d1) == sprintf(A_FMT,d1) + so two dnodes can point at the same anode. + (2) Save a little data space(64K PC mentality). + + the cost is an extra level of indirection. + + Some care is needed so that things like + A[1] = 2 ; delete A["1"] work . +*/ + +#define _dhash(d) (((int)(d)&0x7fff)%A_HASH_PRIME) +#define DHASH(d) (last_dhash=_dhash(d)) +static unsigned last_dhash ; + +/* switch =======;;;;;;hhhh */ + +static ANODE *find_by_sval(A, sval, cflag) + ARRAY A ; + STRING *sval ; + int cflag ; /* create if on */ +{ + char *s = sval->str ; + unsigned h = hash(s) % A_HASH_PRIME ; + register ANODE *p = A[h].link ; + ANODE *q = 0 ; /* holds first deleted ANODE */ + + while ( p ) + { + if ( p->sval ) + { if ( strcmp(s,p->sval->str) == 0 ) return p ; } + else /* its deleted, mark with q */ + if ( ! q ) q = p ; + + p = p->link ; + } + + /* not there */ + if ( cflag ) + { + if ( q ) p = q ; /* reuse the deleted node q */ + else + { p = (ANODE *)zmalloc(sizeof(ANODE)) ; + p->link = A[h].link ; A[h].link = p ; + } + + p->sval = sval ; + sval->ref_cnt++ ; + p->cp = (CELL *) zmalloc(sizeof(CELL)) ; + p->cp->type = C_NOINIT ; + } + return p ; +} + + +/* on the D_ANODE list, when we find a node we move it + to the front of the hash chain */ + +static D_ANODE *find_by_dval(A, d, cflag) + ARRAY A ; + double d ; + int cflag ; +{ + unsigned h = DHASH(d) ; + register D_ANODE *p = A[h].dlink ; + D_ANODE *q = 0 ; /* trails p for move to front */ + ANODE *ap ; + + while ( p ) + if ( p->dval == d ) + { /* found */ + if ( ! p->ap->sval ) /* but it was deleted by string */ + { if ( q ) q->dlink = p->dlink ; + else A[h].dlink = p->dlink ; + zfree(p, sizeof(D_ANODE)) ; + break ; + } + /* found */ + if ( !q ) return p ; /* already at front */ + else /* delete to put at front */ + { q->dlink = p->dlink ; goto found ; } + } + else + { q = p ; p = p->dlink ; } + +void (*signal())() ; + diff --git a/src/libmawk/regression/nan_if.awk b/src/libmawk/regression/nan_if.awk new file mode 100644 index 0000000..d83c783 --- /dev/null +++ b/src/libmawk/regression/nan_if.awk @@ -0,0 +1,10 @@ +# should throw a runtime error instead +BEGIN { + nan=log(-1) + if (nan) + print "then" + else + print "else" + print "after" +} + diff --git a/src/libmawk/regression/nan_if.ref b/src/libmawk/regression/nan_if.ref new file mode 100644 index 0000000..06b7df2 --- /dev/null +++ b/src/libmawk/regression/nan_if.ref @@ -0,0 +1,2 @@ +lmawk: run time error: NaN in conditional jump + FILENAME="" FNR=0 NR=0 diff --git a/src/libmawk/regression/nan_io.awk b/src/libmawk/regression/nan_io.awk new file mode 100644 index 0000000..e353331 --- /dev/null +++ b/src/libmawk/regression/nan_io.awk @@ -0,0 +1,6 @@ +BEGIN { + a="nan"+1 + print a + printf("%f\n", a) + print sprintf("%f", a) +} \ No newline at end of file diff --git a/src/libmawk/regression/nan_io.ref b/src/libmawk/regression/nan_io.ref new file mode 100644 index 0000000..10c8c31 --- /dev/null +++ b/src/libmawk/regression/nan_io.ref @@ -0,0 +1,3 @@ +nan +nan +nan diff --git a/src/libmawk/regression/nan_isnan.awk b/src/libmawk/regression/nan_isnan.awk new file mode 100644 index 0000000..9eb4e30 --- /dev/null +++ b/src/libmawk/regression/nan_isnan.awk @@ -0,0 +1,7 @@ +BEGIN { + print isnan(0) + print isnan(1) + print isnan(log(-1)) + print isnan("nan") + print isnan("nan"+1) +} diff --git a/src/libmawk/regression/nan_isnan.ref b/src/libmawk/regression/nan_isnan.ref new file mode 100644 index 0000000..f6ecf65 --- /dev/null +++ b/src/libmawk/regression/nan_isnan.ref @@ -0,0 +1,5 @@ +0 +0 +1 +1 +1 diff --git a/src/libmawk/regression/nan_ops.awk b/src/libmawk/regression/nan_ops.awk new file mode 100644 index 0000000..df76825 --- /dev/null +++ b/src/libmawk/regression/nan_ops.awk @@ -0,0 +1,80 @@ +BEGIN { nan=log(-1) } + +# op=, variables +{ + a = 1 + a += nan + print a + + a = 1 + a -= nan + print a + + a = 1 + a *= nan + print a + + a = 1 + a /= nan + print a +} + +# op=, fields have their on path in the code +{ + $1 = 1 + $1 += nan + print $1 + + $1 = 1 + $1 -= nan + print $1 + + $1 = 1 + $1 *= nan + print $1 + + $1 = 1 + $1 /= nan + print $1 +} + +# inc/dec for vars and fields +{ + a = nan + a++ + print a + + a = nan + a-- + print a + + a = nan + ++a + print a + + a = nan + --a + print a + + $1 = nan + $1++ + print $1 + + $1 = nan + $1-- + print $1 + + $1 = nan + ++$1 + print $1 + + $1 = nan + --$1 + print $1 + +} + +{ + # enough to test once + exit 0 +} diff --git a/src/libmawk/regression/nan_ops.ref b/src/libmawk/regression/nan_ops.ref new file mode 100644 index 0000000..998006e --- /dev/null +++ b/src/libmawk/regression/nan_ops.ref @@ -0,0 +1,16 @@ +nan +nan +nan +nan +nan +nan +nan +nan +nan +nan +nan +nan +nan +nan +nan +nan diff --git a/src/libmawk/regression/re_test/re_test.c b/src/libmawk/regression/re_test/re_test.c new file mode 100644 index 0000000..7a5779d --- /dev/null +++ b/src/libmawk/regression/re_test/re_test.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include "vargs.h" +#include "mawk.h" +#include "memory.h" +#include "regexp.h" + +void mawk_exit_(mawk_state_t *MAWK, int x) +{ + fprintf(stderr, "mawk_exit\n"); + exit(1); +} + +char *mawk_str_str(register char *target, char *key, unsigned key_len) +{ + register int k = key[0]; + + switch (key_len) { + case 0: + return (char *) 0; + case 1: + return strchr(target, k); + case 2: + { + int k1 = key[1]; + while ((target = strchr(target, k))) + if (target[1] == k1) + return target; + else + target++; + /*failed */ + return (char *) 0; + } + } + + key_len--; + while ((target = strchr(target, k))) { + if (strncmp(target + 1, key + 1, key_len) == 0) + return target; + else + target++; + } + /*failed */ + return (char *) 0; +} + +void mawk_rt_error VA_ALIST(const char *, format) +{ + va_list args; + + fprintf(stderr, "%s: run time error: ", MAWK->progname); + VA_START(args, char *, format); + vfprintf(stderr, format, args); + va_end(args); + putc('\n', stderr); + exit(1); +} + + +void mawk_compile_error VA_ALIST(const char *, format) +{ + va_list args; + const char *s0, *s1; + + /* with multiple program files put program name in + error message */ + if (MAWK->ps.pfile_name) { + s0 = MAWK->ps.pfile_name; + s1 = ": "; + } + else { + s0 = s1 = ""; + } + + fprintf(stderr, "%s: %s%sline %u: ", MAWK->progname, s0, s1, MAWK->token_lineno); + VA_START(args, char *, format); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\n"); + if (++(MAWK->compile_error_count) == MAX_COMPILE_ERRORS) + mawk_exit(MAWK, 2); +} + + +#define test(str) \ + do { \ + PTR p = mawk_REcompile(MAWK, str); \ + printf("%s -> %s %d\n", str, p == NULL ? "err" : "ok", MAWK->REerrno); \ + } while(0) + +int main() +{ + mawk_state_t MAWK_, *MAWK = &MAWK_; + + memset(MAWK, 0, sizeof(mawk_state_t)); + + test("foo"); + test(")"); + test("("); + test("[abc"); + test("foo|"); + test("(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)"); + test("^*hah"); + /* TODO: how to cause E7? */ + + + mawk_free_all(MAWK); + + return 0; +} diff --git a/src/libmawk/regression/re_test.ref b/src/libmawk/regression/re_test.ref new file mode 100644 index 0000000..5876163 --- /dev/null +++ b/src/libmawk/regression/re_test.ref @@ -0,0 +1,7 @@ +foo -> ok 0 +) -> err 1 +( -> err 2 +[abc -> err 3 +foo| -> err 4 +(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+) -> err 5 +^*hah -> err 6 diff --git a/src/libmawk/regression/reg012.awk b/src/libmawk/regression/reg012.awk new file mode 100644 index 0000000..ae67cf3 --- /dev/null +++ b/src/libmawk/regression/reg012.awk @@ -0,0 +1,9 @@ +# used to be test/reg[012].awk + +/return/ {cnt[0]++} + +/return|switch/ {cnt[1]++} + +/[A-Za-z_][A-Za-z0-9_]*\[.*\][ \t]*=/ {cnt[2]++} + +END{print "return=" cnt[0], "return|switch=" cnt[1], "array=" cnt[2]} diff --git a/src/libmawk/regression/reg012.ref b/src/libmawk/regression/reg012.ref new file mode 100644 index 0000000..cbf30e5 --- /dev/null +++ b/src/libmawk/regression/reg012.ref @@ -0,0 +1 @@ +return=3 return|switch=4 array=2 diff --git a/src/libmawk/regression/small_reads.ref b/src/libmawk/regression/small_reads.ref new file mode 100644 index 0000000..141caaa --- /dev/null +++ b/src/libmawk/regression/small_reads.ref @@ -0,0 +1,35 @@ +>alloc: 0 +>write: 80 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' +>write: 80 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' +>write: 4 '9876' +>dump + Size: 164/0 + blk 080 from 000 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + blk 080 from 000 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + stg 4 '9876' +>read: 75 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvw' +>dump + Size: 89/0 + blk 080 from 075 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + blk 080 from 000 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + stg 4 '9876' +>read: 7 'xyz12ab' +>dump + Size: 82/0 + blk 080 from 002 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + stg 4 '9876' +>read: 75 'cdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxy' +>dump + Size: 7/0 + blk 080 from 077 'abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12' + ^--readp + stg 4 '9876' +>read: 7 'z129876' +>dump + Size: 0/0 + stg 0 '' diff --git a/src/libmawk/regression/small_reads.zf b/src/libmawk/regression/small_reads.zf new file mode 100644 index 0000000..f046f6e --- /dev/null +++ b/src/libmawk/regression/small_reads.zf @@ -0,0 +1,14 @@ +alloc +# write 2x80 chars +write abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12 +write abcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyzabcdefghijklmnopqrtsuvwxyz12 +write 9876 +dump +read 75 +dump +read 7 +dump +read 75 +dump +read 100 +dump diff --git a/src/libmawk/regression/stack_grow.awk b/src/libmawk/regression/stack_grow.awk new file mode 100644 index 0000000..5327b6e --- /dev/null +++ b/src/libmawk/regression/stack_grow.awk @@ -0,0 +1,11 @@ +function rec(depth) +{ + if (depth < MAX) + return rec(depth+1) + return "wow at " depth +} + +BEGIN { + MAX=1000 + print rec(0) +} diff --git a/src/libmawk/regression/stack_grow.ref b/src/libmawk/regression/stack_grow.ref new file mode 100644 index 0000000..e13a039 --- /dev/null +++ b/src/libmawk/regression/stack_grow.ref @@ -0,0 +1 @@ +wow at 1000 diff --git a/src/libmawk/regression/stage1.ref b/src/libmawk/regression/stage1.ref new file mode 100644 index 0000000..5a985b5 --- /dev/null +++ b/src/libmawk/regression/stage1.ref @@ -0,0 +1,10 @@ +>alloc: 0 +>write: 1 'a' +>write: 2 'bc' +>write: 3 'def' +>write: 1 'g' +>dump + Size: 7/0 + stg 7 'abcdefg' +>read: 7 'abcdefg' +>free diff --git a/src/libmawk/regression/stage1.zf b/src/libmawk/regression/stage1.zf new file mode 100644 index 0000000..0ba2c1c --- /dev/null +++ b/src/libmawk/regression/stage1.zf @@ -0,0 +1,10 @@ +# a series of small writes that woudl end up in the staging buff +alloc +write a +write bc +write def +write g +dump +read 128 +free + diff --git a/src/libmawk/regression/stage2.ref b/src/libmawk/regression/stage2.ref new file mode 100644 index 0000000..e12f82d --- /dev/null +++ b/src/libmawk/regression/stage2.ref @@ -0,0 +1,23 @@ +>alloc: 0 +>write: 40 '123456789|123456789|123456789|123456789|' +>dump + Size: 40/0 + stg 40 '123456789|123456789|123456789|123456789|' +>write: 20 'abcdefghi|abcdefghi|' +>dump + Size: 60/0 + stg 60 '123456789|123456789|123456789|123456789|abcdefghi|abcdefghi|' +>write: 4 'ABCD' +>dump + Size: 64/0 + blk 064 from 000 '123456789|123456789|123456789|123456789|abcdefghi|abcdefghi|ABCD' + ^--readp + stg 0 '' +>write: 6 '!EFGHI' +>dump + Size: 70/0 + blk 064 from 000 '123456789|123456789|123456789|123456789|abcdefghi|abcdefghi|ABCD' + ^--readp + stg 6 '!EFGHI' +>read: 70 '123456789|123456789|123456789|123456789|abcdefghi|abcdefghi|ABCD!EFGHI' +>free diff --git a/src/libmawk/regression/stage2.zf b/src/libmawk/regression/stage2.zf new file mode 100644 index 0000000..dc72ce6 --- /dev/null +++ b/src/libmawk/regression/stage2.zf @@ -0,0 +1,13 @@ +# a series of small writes that woudl end up in the staging buff +alloc +write 123456789|123456789|123456789|123456789| +dump +write abcdefghi|abcdefghi| +dump +write ABCD +dump +write !EFGHI +dump +read 128 +free + diff --git a/src/libmawk/regression/test.in b/src/libmawk/regression/test.in new file mode 100644 index 0000000..37c1e7c --- /dev/null +++ b/src/libmawk/regression/test.in @@ -0,0 +1,119 @@ +1 +22 +333 +4444 + +2) A number of obscure bugs have been fixed such as, + you can now make a recursive function call inside a for[ i in A] =oop. + Function calls with array parameters in loop expressions sometimes + generated erroneous internal code. + + +### original mawk test input ### + +#include + +extern unsigned hash() ; + +/* An array A is a pointer to an array of struct array, + which is two hash tables in one. One for strings + and one for doubles. + + each array is of size A_HASH_PRIME. + + When an index is deleted via delete A[i], the + ANODE is not removed from the hash chain. A[i].cp + and A[i].sval are both freed and sval is set NULL. + This method of deletion simplifies for( i in A ) loops. + + On the D_ANODE list, we use real deletion and move to the + front on access. + + Separate nodes (as opposed to one type of node on two lists) + to + (1) d1 != d2, but sprintf(A_FMT,d1) == sprintf(A_FMT,d1) + so two dnodes can point at the same anode. + (2) Save a little data space(64K PC mentality). + + the cost is an extra level of indirection. + + Some care is needed so that things like + A[1] = 2 ; delete A["1"] work . +*/ + +#define _dhash(d) (((int)(d)&0x7fff)%A_HASH_PRIME) +#define DHASH(d) (last_dhash=_dhash(d)) +static unsigned last_dhash ; + +/* switch =======;;;;;;hhhh */ + +static ANODE *find_by_sval(A, sval, cflag) + ARRAY A ; + STRING *sval ; + int cflag ; /* create if on */ +{ + char *s = sval->str ; + unsigned h = hash(s) % A_HASH_PRIME ; + register ANODE *p = A[h].link ; + ANODE *q = 0 ; /* holds first deleted ANODE */ + + while ( p ) + { + if ( p->sval ) + { if ( strcmp(s,p->sval->str) == 0 ) return p ; } + else /* its deleted, mark with q */ + if ( ! q ) q = p ; + + p = p->link ; + } + + /* not there */ + if ( cflag ) + { + if ( q ) p = q ; /* reuse the deleted node q */ + else + { p = (ANODE *)zmalloc(sizeof(ANODE)) ; + p->link = A[h].link ; A[h].link = p ; + } + + p->sval = sval ; + sval->ref_cnt++ ; + p->cp = (CELL *) zmalloc(sizeof(CELL)) ; + p->cp->type = C_NOINIT ; + } + return p ; +} + + +/* on the D_ANODE list, when we find a node we move it + to the front of the hash chain */ + +static D_ANODE *find_by_dval(A, d, cflag) + ARRAY A ; + double d ; + int cflag ; +{ + unsigned h = DHASH(d) ; + register D_ANODE *p = A[h].dlink ; + D_ANODE *q = 0 ; /* trails p for move to front */ + ANODE *ap ; + + while ( p ) + if ( p->dval == d ) + { /* found */ + if ( ! p->ap->sval ) /* but it was deleted by string */ + { if ( q ) q->dlink = p->dlink ; + else A[h].dlink = p->dlink ; + zfree(p, sizeof(D_ANODE)) ; + break ; + } + /* found */ + if ( !q ) return p ; /* already at front */ + else /* delete to put at front */ + { q->dlink = p->dlink ; goto found ; } + } + else + { q = p ; p = p->dlink ; } + +void (*signal())() ; + diff --git a/src/libmawk/regression/valueof.awk b/src/libmawk/regression/valueof.awk new file mode 100644 index 0000000..467d8e3 --- /dev/null +++ b/src/libmawk/regression/valueof.awk @@ -0,0 +1,20 @@ +function test(var, idx) +{ + if (idx == "") + print var "=" valueof(var) + else + print var "=" valueof(var, idx) +} + +BEGIN { + string="hello world" + number=42.321 + array[1] = "one" + array[2] = "two" + array["three"] = 3 + test("string") + test("number") + test("array", 1) + test("array", 2) + test("array", "three") +} \ No newline at end of file diff --git a/src/libmawk/regression/valueof.ref b/src/libmawk/regression/valueof.ref new file mode 100644 index 0000000..78de035 --- /dev/null +++ b/src/libmawk/regression/valueof.ref @@ -0,0 +1,5 @@ +string=hello world +number=42.321 +array=one +array=two +array=3 diff --git a/src/libmawk/regression/wc.awk b/src/libmawk/regression/wc.awk new file mode 100644 index 0000000..0875399 --- /dev/null +++ b/src/libmawk/regression/wc.awk @@ -0,0 +1,3 @@ + +{sum += NF} +END{ print NR, sum} diff --git a/src/libmawk/regression/wc.ref b/src/libmawk/regression/wc.ref new file mode 100644 index 0000000..f292804 --- /dev/null +++ b/src/libmawk/regression/wc.ref @@ -0,0 +1 @@ +119 528 diff --git a/src/libmawk/regression/wfrq0.awk b/src/libmawk/regression/wfrq0.awk new file mode 100644 index 0000000..7791e0b --- /dev/null +++ b/src/libmawk/regression/wfrq0.awk @@ -0,0 +1,98 @@ + +# this program finds the twenty most freq +# words in document using a heap sort at the end +# +# + +function down_heap(i, k,hold) +{ + while ( 1 ) + { + if ( compare(heap[2*i], heap[2*i+1]) <= 0 ) k = 2*i + else k = 2*i + 1 + + if ( compare(heap[i],heap[k]) <= 0 ) return + + hold = heap[k] ; heap[k] = heap[i] ; heap[i] = hold + i = k + } +} + +# compares two values of form "number word" +# by number and breaks ties by word (reversed) + +function compare(s1, s2, t, X) +{ + t = (s1+0) - (s2+0) # forces types to number + + if ( t == 0 ) + { + split(s1, X); s1 = X[2] + split(s2, X); s2 = X[2] + if ( s2 < s1 ) return -1 + return s1 < s2 + } + + return t +} + + +BEGIN { RS = "[^a-zA-Z]+" ; BIG = "999999:" } + +{ cnt[$0]++ } + +END { delete cnt[ "" ] + +# load twenty values +j = 1 +for( i in cnt ) +{ + heap[j] = num_word( cnt[i] , i ) + delete cnt[i] ; + if ( ++j == 21 ) break ; +} + +# make some sentinals +for( i = j ; i < 43 ; i++ ) heap[i] = BIG + +h_empty = j # save the first empty slot +# make a heap with the smallest in slot 1 +for( i = h_empty - 1 ; i > 0 ; i-- ) down_heap(i) + +# examine the rest of the values +for ( i in cnt ) +{ + j = num_word(cnt[i], i) + if ( compare(j, heap[1]) > 0 ) + { # its bigger + # take the smallest out of the heap and readjust + heap[1] = j + down_heap(1) + } +} + +h_empty-- ; + +# what's left are the twenty largest +# smallest at the top +# + +i = 20 +while ( h_empty > 1 ) +{ + buffer[i--] = heap[1] + heap[1] = heap[h_empty] + heap[h_empty] = BIG + down_heap(1) + h_empty-- +} + buffer[i--] = heap[1] + + for(j = 1 ; j <= 20 ; j++ ) print buffer[j] +} + + +function num_word(num, word) +{ + return sprintf("%3d %s", num, word) +} diff --git a/src/libmawk/regression/wfrq0.ref b/src/libmawk/regression/wfrq0.ref new file mode 100644 index 0000000..0e5a6c5 --- /dev/null +++ b/src/libmawk/regression/wfrq0.ref @@ -0,0 +1,20 @@ + 29 p + 23 A + 14 ANODE + 13 q + 12 d + 12 sval + 10 if + 10 the + 8 dlink + 8 h + 8 is + 7 of + 7 to + 6 D + 5 a + 5 array + 5 cflag + 5 deleted + 5 else + 5 for diff --git a/src/libmawk/regression/zfifo/Makefile b/src/libmawk/regression/zfifo/Makefile new file mode 100644 index 0000000..6c37891 --- /dev/null +++ b/src/libmawk/regression/zfifo/Makefile @@ -0,0 +1,20 @@ +CFLAGS = -g -I. + +OBJS=zfifo_test.o zfifo.o + +all: zfifo_test + +zfifo_test: $(OBJS) + $(CC) $(OBJS) -o $@ + +zfifo.c zfifo.h: ../../zfifo.c ../../zfifo.h + cp ../../zfifo.c . + cp ../../zfifo.h . + +zfifo.o: zfifo.c zfifo.h + $(CC) -c $(CFLAGS) zfifo.c -o $@ + +zfifo_test.o: zfifo.h + +clean: + rm $(OBJS) zfifo_test zfifo.c zfifo.h 2>/dev/null; true diff --git a/src/libmawk/regression/zfifo/mawk.h b/src/libmawk/regression/zfifo/mawk.h new file mode 100644 index 0000000..da56a24 --- /dev/null +++ b/src/libmawk/regression/zfifo/mawk.h @@ -0,0 +1,5 @@ +#ifndef MAWK_H +#define MAWK_H +typedef struct { int dummy;} mawk_state_t; +#endif + diff --git a/src/libmawk/regression/zfifo/zfifo_test.c b/src/libmawk/regression/zfifo/zfifo_test.c new file mode 100644 index 0000000..39f17c6 --- /dev/null +++ b/src/libmawk/regression/zfifo/zfifo_test.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include "zfifo.h" + + +mawk_state_t *MAWK = NULL; +int main(int argc, char *argv[]) +{ + mawk_zfifo_t fifo; + + while(!(feof(stdin))) { + char line[1024], *cmd, *args; + *line = '\0'; + fgets(line, sizeof(line), stdin); + + cmd = line; + while(isspace(*cmd)) cmd++; + if ((*cmd == '#') || (*cmd == '\0')) + continue; + args = strpbrk(cmd, " \t\r\n"); + if (args != NULL) { + *args = '\0'; + args++; + } + + if (strcmp(cmd, "alloc") == 0) { + mawk_zfifo_alloc(MAWK, &fifo, (args == NULL ? -1 : atoi(args))); + printf(">alloc: %d\n", fifo.max_size); + } + else if (strcmp(cmd, "free") == 0) { + mawk_zfifo_free(MAWK, &fifo); + printf(">free\n"); + } + else if (strcmp(cmd, "write") == 0) { + int len; + assert(args != NULL); + len = strlen(args)-1; + while((len > 0) && ((args[len] == '\n') || (args[len] == '\r'))) { + args[len] = '\0'; + len--; + } + len++; + printf(">write: %d '%s'\n", mawk_zfifo_write(MAWK, &fifo, args, len), args); + } + else if (strcmp(cmd, "read") == 0) { + int size = sizeof(line)-1; + int ret; + if (args != NULL) + size = atoi(args); + *line = '\0'; + ret = mawk_zfifo_read(MAWK, &fifo, line, size); + if (ret >= 0) + line[ret] = '\0'; + else + line[0] = '\0'; + printf(">read: %d '%s'\n", ret, line); + } + else if (strcmp(cmd, "dump") == 0) { + mawk_zfifo_block_t *b; + printf(">dump\n"); + printf(" Size: %d/%d\n", fifo.size, fifo.max_size); + for(b = fifo.head; b != NULL; b = b->next) { + int n; + memcpy(line, b->buf, b->size); + line[b->size] = '\0'; + printf(" blk %03d from %03d '%s'\n", b->size, b->readp, line); + printf(" "); + for(n = b->readp; n > 0; n--) + printf(" "); + printf("^--readp\n"); + } + if (fifo.stage_used > 0) + memcpy(line, fifo.stage_buf, fifo.stage_used); + line[fifo.stage_used] = '\0'; + printf(" stg %d '%s'\n", fifo.stage_used, line); + } + else if (strcmp(cmd, "exit") == 0) { + break; + } + else { + fprintf(stderr, "Syntax error at invalid command '%s'\n", cmd); + } + } + + return 0; +} diff --git a/src/libmawk/regression/zfifo/zmalloc.h b/src/libmawk/regression/zfifo/zmalloc.h new file mode 100644 index 0000000..8ff3a5e --- /dev/null +++ b/src/libmawk/regression/zfifo/zmalloc.h @@ -0,0 +1,8 @@ +#ifndef ZMALLOC_H +#define ZMALLOC_H + +#include +#define mawk_zfree(MAWK, ptr, size) free(ptr) +#define mawk_zmalloc(MAWK, size) malloc(size) + +#endif diff --git a/src/libmawk/repl.h b/src/libmawk/repl.h new file mode 100644 index 0000000..9583305 --- /dev/null +++ b/src/libmawk/repl.h @@ -0,0 +1,31 @@ + +/******************************************** +repl.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef REPL_H +#define REPL_H + +PTR mawk_re_compile(mawk_state_t *, mawk_string_t *); +char *mawk_re_uncompile(mawk_state_t *, PTR); + + +mawk_cell_t *mawk_repl_compile(mawk_state_t *, mawk_string_t *); +char *mawk_repl_uncompile(mawk_state_t *, mawk_cell_t *); +int pmawk_repl_uncompile_bin(mawk_state_t *MAWK, mawk_cell_t *cp); +void mawk_repl_destroy(mawk_state_t *, mawk_cell_t *); +mawk_cell_t *mawk_replv_cpy(mawk_state_t *, mawk_cell_t *, const mawk_cell_t *); +mawk_cell_t *mawk_replv_to_repl(mawk_state_t *, mawk_cell_t *, mawk_string_t *); + +#endif diff --git a/src/libmawk/rexp/Makefile b/src/libmawk/rexp/Makefile new file mode 100644 index 0000000..4f28cc7 --- /dev/null +++ b/src/libmawk/rexp/Makefile @@ -0,0 +1,24 @@ + +#################################### +# This is a makefile for mawk, +# an implementation of AWK (1988). +#################################### +# +# + +CC = cc +CFLAGS = -g -Wall -O0 -I../.. -DSIZE_T_TYPES_H -fPIC + +O=rexp.o rexp0.o rexp1.o rexp2.o rexp3.o rexpdb.o +DB=rexpdb.o + +all : $(O) + @cat .done + +debug : $(O) $(DB) + @cat .done + +$(O) : rexp.h + +clean : + rm -f *.o .done diff --git a/src/libmawk/rexp/rexp.c b/src/libmawk/rexp/rexp.c new file mode 100644 index 0000000..9b8b517 --- /dev/null +++ b/src/libmawk/rexp/rexp.c @@ -0,0 +1,224 @@ + +/******************************************** +rexp.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* op precedence parser for regular expressions */ + +#include "rexp.h" + + +/* DATA */ +const char *mawk_REerrlist[] = { (const char *) 0, + /* 1 */ "missing '('", + /* 2 */ "missing ')'", + /* 3 */ "bad class -- [], [^] or [", + /* 4 */ "missing operand", + /* 5 */ "resource exhaustion -- regular expression too large", + /* 6 */ "syntax error ^* or ^+" +}; + +/* E5 is very unlikely to occur */ + + +/* This table drives the operator precedence parser */ +static const short table[8][8] = { + +/* 0 | CAT * + ? ( ) */ +/* 0 */ { 0, L, L, L, L, L, L, E1}, +/* | */ { G, G, L, L, L, L, L, G}, +/* CAT*/{ G, G, G, L, L, L, L, G}, +/* * */ { G, G, G, G, G, G, E7, G}, +/* + */ { G, G, G, G, G, G, E7, G}, +/* ? */ { G, G, G, G, G, G, E7, G}, +/* ( */ { E2, L, L, L, L, L, L, EQ}, +/* ) */ { G, G, G, G, G, G, E7, G} +}; + + +#define STACKSZ 64 + + +PTR mawk_REcompile(mawk_state_t *MAWK, char *re) +{ + MACHINE m_stack[STACKSZ]; + struct op { + int token; + int prec; + } op_stack[STACKSZ]; + register MACHINE *m_ptr; + register struct op *op_ptr; + register int t; + int ern; + + /* do this first because it also checks if we have a + run time stack */ + t = mawk_RE_lex_init(MAWK, re); + if (t < 0) { + MAWK->REerrno = -t; + return NULL; + } + + if (*re == 0) { + mawk_RESTATE *p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, sizeof(mawk_RESTATE)); + if (p == NULL) { + MAWK->REerrno = -MEMORY_FAILURE; + return NULL; + } + p->type = M_ACCEPT; + return (PTR) p; + } + + /* initialize the stacks */ + m_ptr = m_stack - 1; + op_ptr = op_stack; + op_ptr->token = 0; + + t = mawk_RE_lex(MAWK, m_stack); + if (t < 0) { + MAWK->REerrno = -t; + return NULL; + } + + while (1) { + switch (t) { + case T_STR: + case T_ANY: + case T_U: + case T_START: + case T_END: + case T_CLASS: + m_ptr++; + break; + + case 0: /* end of reg expr */ + if (op_ptr->token == 0) { + /* done */ + if (m_ptr == m_stack) + return (PTR) m_ptr->start; + else { + /* machines still on the stack */ + mawk_RE_panic("values still on machine stack"); + } + } + + /* otherwise fall thru to default + which is operator case */ + + default: + + if ((op_ptr->prec = table[op_ptr->token][t]) == G) { + do { /* op_pop */ + + if (op_ptr->token <= T_CAT) /*binary op */ + m_ptr--; + /* if not enough values on machine stack + then we have a missing operand */ + if (m_ptr < m_stack) { + MAWK->REerrno = -E4; + return NULL; + } + + switch (op_ptr->token) { + case T_CAT: + ern = mawk_RE_cat(MAWK, m_ptr, m_ptr + 1); + if (ern < 0) { + MAWK->REerrno = -ern; + return NULL; + } + break; + + case T_OR: + ern = mawk_RE_or(MAWK, m_ptr, m_ptr + 1); + if (ern < 0) { + MAWK->REerrno = -ern; + return NULL; + } + break; + + case T_STAR: + ern = mawk_RE_close(MAWK, m_ptr); + if (ern < 0) { + MAWK->REerrno = -ern; + return NULL; + } + break; + + case T_PLUS: + ern = mawk_RE_poscl(MAWK, m_ptr); + if (ern < 0) { + MAWK->REerrno = -ern; + return NULL; + } + break; + + case T_Q: + ern = mawk_RE_01(MAWK, m_ptr); + if (ern < 0) { + MAWK->REerrno = -ern; + return NULL; + } + break; + + default: + /*nothing on ( or ) */ + break; + } + + op_ptr--; + } + while (op_ptr->prec != L); + + continue; /* back thru switch at top */ + } + + if (op_ptr->prec < 0) { + if (op_ptr->prec == E7) + mawk_RE_panic("parser returns E7"); + else { + MAWK->REerrno = -op_ptr->prec; + return NULL; + } + } + + if (++op_ptr == op_stack + STACKSZ) { + /* stack overflow */ + MAWK->REerrno = -E5; + return NULL; + } + + op_ptr->token = t; + } /* end of switch */ + + if (m_ptr == m_stack + (STACKSZ - 1)) { + /*overflow */ + MAWK->REerrno = -E5; + return NULL; + } + + t = mawk_RE_lex(MAWK, m_ptr + 1); + if (t < 0) { + MAWK->REerrno = -t; + return NULL; + } + } +} + + +/* getting here means a logic flaw or unforeseen case */ +void mawk_RE_panic(char *s) +{ + fprintf(stderr, "mawk_REcompile() - panic: %s\n", s); + exit(100); +} diff --git a/src/libmawk/rexp/rexp.h b/src/libmawk/rexp/rexp.h new file mode 100644 index 0000000..a0982ce --- /dev/null +++ b/src/libmawk/rexp/rexp.h @@ -0,0 +1,112 @@ + +/******************************************** +rexp.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef REXP_H +#define REXP_H + +#include "libmawk/nstd.h" +#include "libmawk/mawk.h" +#include + +PTR mawk_RE_malloc(mawk_state_t *MAWK, unsigned); +PTR mawk_RE_realloc(mawk_state_t *MAWK, void *, unsigned, unsigned); + + +/* finite machine state types */ + +#define M_STR 0 +#define M_CLASS 1 +#define M_ANY 2 +#define M_START 3 +#define M_END 4 +#define M_U 5 +#define M_1J 6 +#define M_2JA 7 +#define M_2JB 8 +#define M_ACCEPT 9 +#define U_ON 10 + +#define U_OFF 0 +#define END_OFF 0 +#define END_ON (2*U_ON) + + +#define STATESZ (sizeof(mawk_RESTATE)) + +typedef struct { + mawk_RESTATE *start, *stop; + int size; /* how many bytes are allocated for start */ +} MACHINE; +#define is_invm(m) (((m).start == NULL) && ((m).stop == NULL)) + + +/* tokens */ +#define T_OR 1 /* | */ +#define T_CAT 2 +#define T_STAR 3 /* * */ +#define T_PLUS 4 /* + */ +#define T_Q 5 /* ? */ +#define T_LP 6 /* ( */ +#define T_RP 7 /* ) */ +#define T_START 8 /* ^ */ +#define T_END 9 /* $ */ +#define T_ANY 10 /* . */ +#define T_CLASS 11 /* starts with [ */ +#define T_SLASH 12 /* \ */ +#define T_CHAR 13 /* all the rest */ +#define T_STR 14 +#define T_U 15 + +/* precedences and error codes */ +#define L 0 +#define EQ 1 +#define G 2 +#define E1 (-1) +#define E2 (-2) +#define E3 (-3) +#define E4 (-4) +#define E5 (-5) +#define E6 (-6) +#define E7 (-7) + +#define MEMORY_FAILURE 5 + +#define ison(b,x) ((b)[((unsigned char)(x))>>3] & (1<<((x)&7))) + +/* error trap */ +MACHINE mawk_RE_u(mawk_state_t *MAWK); +MACHINE mawk_RE_start(mawk_state_t *MAWK); +MACHINE mawk_RE_end(mawk_state_t *MAWK); +MACHINE mawk_RE_any(mawk_state_t *MAWK); +MACHINE mawk_RE_str(mawk_state_t *MAWK, char *, unsigned); +MACHINE mawk_RE_class(mawk_state_t *MAWK, mawk_BV *); +int mawk_RE_cat(mawk_state_t *MAWK, MACHINE *, MACHINE *); +int mawk_RE_or(mawk_state_t *MAWK, MACHINE *, MACHINE *); +int mawk_RE_close(mawk_state_t *MAWK, MACHINE *); +int mawk_RE_poscl(mawk_state_t *MAWK, MACHINE *); +int mawk_RE_01(mawk_state_t *MAWK, MACHINE *); +void mawk_RE_panic(char *); +char *mawk_str_str(char *, char *, unsigned); + +int mawk_RE_lex_init(mawk_state_t *MAWK, char *); +int mawk_RE_lex(mawk_state_t *MAWK, MACHINE *); +int mawk_RE_run_stack_init(mawk_state_t *MAWK); +mawk_RT_STATE *mawk_RE_new_run_stack(mawk_state_t *MAWK); + +void mawk_RE_free(mawk_state_t *MAWK, PTR p, unsigned sz); + + +#endif /* REXP_H */ diff --git a/src/libmawk/rexp/rexp0.c b/src/libmawk/rexp/rexp0.c new file mode 100644 index 0000000..8401303 --- /dev/null +++ b/src/libmawk/rexp/rexp0.c @@ -0,0 +1,573 @@ + +/******************************************** +rexp0.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* lexical scanner */ + +#include "rexp.h" +#include "libmawk/zmalloc.h" +#include + +/* static functions */ +static int do_str(mawk_state_t *MAWK, int, char **, MACHINE *); +static int do_class(mawk_state_t *MAWK, char **, MACHINE *); +static int escape(char **); +static mawk_BV *store_bvp(mawk_state_t *MAWK, mawk_BV *); +static int ctohex(int); + + +#ifndef EG +/* make next array visible */ +static +#endif +const char RE_char2token['|' + 1] = { + 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 9, 13, 13, 13, + 6, 7, 3, 4, 13, 13, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 5, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 11, 12, 13, 8, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 1 +}; + +#define char2token(x) \ +( (unsigned char)(x) > '|' ? T_CHAR : RE_char2token[(int)x] ) + +#define NOT_STARTED (-1) + +int mawk_RE_lex_init(mawk_state_t *MAWK, char *re) +{ + MAWK->RElp = re; + MAWK->RElen = strlen(re) + 1; + MAWK->REprev = NOT_STARTED; + return mawk_RE_run_stack_init(MAWK); +} + + +int mawk_RE_lex(mawk_state_t *MAWK, MACHINE *mp) +{ + register int c; + +/* reswitch: */ + switch (c = char2token(*MAWK->RElp)) { + case T_PLUS: + case T_STAR: + if (MAWK->REprev == T_START) + return -6; + /* fall thru */ + + case T_OR: + case T_Q: + case T_RP: + MAWK->RElp++; + return MAWK->REprev = c; + + case T_SLASH: + break; + + case 0: + return 0; + + case T_LP: + switch (MAWK->REprev) { + case T_CHAR: + case T_STR: + case T_ANY: + case T_CLASS: + case T_START: + case T_RP: + case T_PLUS: + case T_STAR: + case T_Q: + case T_U: + return MAWK->REprev = T_CAT; + + default: + MAWK->RElp++; + return MAWK->REprev = T_LP; + } + } + + /* *lp is an operand, but implicit cat op is possible */ + switch (MAWK->REprev) { + case NOT_STARTED: + case T_OR: + case T_LP: + case T_CAT: + + switch (c) { + case T_ANY: + { + static int plus_is_star_flag = 0; + + if (*++MAWK->RElp == '*') { + MAWK->RElp++; + *mp = mawk_RE_u(MAWK); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + return MAWK->REprev = T_U; + } + else if (*MAWK->RElp == '+') { + if (plus_is_star_flag) { + MAWK->RElp++; + *mp = mawk_RE_u(MAWK); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + plus_is_star_flag = 0; + return MAWK->REprev = T_U; + } + else { + plus_is_star_flag = 1; + MAWK->RElp--; + *mp = mawk_RE_any(MAWK); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + return MAWK->REprev = T_ANY; + } + } + else { + *mp = mawk_RE_any(MAWK); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + MAWK->REprev = T_ANY; + } + } + break; + + case T_SLASH: + MAWK->RElp++; + c = escape(&MAWK->RElp); + MAWK->REprev = do_str(MAWK, c, &MAWK->RElp, mp); + if (MAWK->REprev < 0) + return MAWK->REprev; + break; + + case T_CHAR: + c = *MAWK->RElp++; + MAWK->REprev = do_str(MAWK, c, &MAWK->RElp, mp); + if (MAWK->REprev < 0) + return MAWK->REprev; + break; + + case T_CLASS: + MAWK->REprev = do_class(MAWK, &MAWK->RElp, mp); + if (MAWK->REprev < 0) + return MAWK->REprev; + break; + + case T_START: + *mp = mawk_RE_start(MAWK); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + MAWK->RElp++; + MAWK->REprev = T_START; + break; + + case T_END: + MAWK->RElp++; + *mp = mawk_RE_end(MAWK); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + return MAWK->REprev = T_END; + + default: + mawk_RE_panic("bad switch in mawk_RE_lex"); + } + break; + + default: + /* don't advance the pointer */ + return MAWK->REprev = T_CAT; + } + + /* check for end character */ + if (*MAWK->RElp == '$') { + mp->start->type += END_ON; + MAWK->RElp++; + } + + return MAWK->REprev; +} + +/* + Collect a run of characters into a string machine. + If the run ends at *,+, or ?, then don't take the last + character unless the string has length one. +*/ + +static int do_str(mawk_state_t *MAWK, int c, char **pp, MACHINE *mp) +/* int c; the first character */ +/* char **pp; where to put the re_char pointer on exit */ +/* MACHINE *mp; where to put the string machine */ +{ + register char *p; /* runs thru the input */ + char *pt; /* trails p by one */ + char *str; /* collect it here */ + register char *s; /* runs thru the output */ + unsigned len; /* length collected */ + unsigned alloced; + + p = *pp; + s = str = mawk_RE_malloc(MAWK, MAWK->RElen); + if (s == NULL) + return -MEMORY_FAILURE; + alloced = MAWK->RElen; + *s++ = c; + len = 1; + + while (1) { + char *save; + + switch (char2token(*p)) { + case T_CHAR: + pt = p; + *s++ = *p++; + break; + + case T_SLASH: + pt = p; + save = p + 1; /* keep p in a register */ + *s++ = escape(&save); + p = save; + break; + + default: + goto out; + } + len++; + } + +out: + /* if len > 1 and we stopped on a ? + or * , need to back up */ + if (len > 1 && (*p == '*' || *p == '+' || *p == '?')) { + len--; + p = pt; + s--; + } + + *s = 0; + *pp = p; + *mp = mawk_RE_str(MAWK, (char *) mawk_RE_realloc(MAWK, str, alloced, len + 1), len); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + return T_STR; +} + + +/*-------------------------------------------- + BUILD A CHARACTER CLASS + *---------------------------*/ + +#define on( b, x) ((b)[(x)>>3] |= ( 1 << ((x)&7) )) + +static void block_on(mawk_BV b, int x, int y) + /* caller makes sure x<=y and x>0 y>0 */ +{ + int lo = x >> 3; + int hi = y >> 3; + int r_lo = x & 7; + int r_hi = y & 7; + + if (lo == hi) { + b[lo] |= (1 << (r_hi + 1)) - (1 << r_lo); + } + else { + int i; + for (i = lo + 1; i < hi; i++) + b[i] = 0xff; + b[lo] |= (0xff << r_lo); + b[hi] |= ~(0xff << (r_hi + 1)); + } +} + +/* build a mawk_BV for a character class. + *start points at the '[' + on exit: *start points at the character after ']' + mp points at a machine that recognizes the class +*/ + +static int do_class(mawk_state_t *MAWK, char **start, MACHINE *mp) +{ + register char *p; + register mawk_BV *bvp; + int prev; + char *q, *t; + int cnt; + int comp_flag; + + p = t = (*start) + 1; + + /* []...] puts ] in a class + [^]..] negates a class with ] + */ + if (*p == ']') + p++; + else if (*p == '^' && *(p + 1) == ']') + p += 2; + + while (1) { /* find the back of the class */ + if (!(q = strchr(p, ']'))) { + /* no closing bracket */ + return E3; + } + p = q - 1; + cnt = 0; + while (*p == '\\') { + cnt++; + p--; + } + if ((cnt & 1) == 0) { + /* even number of \ */ + break; + } + p = q + 1; + } + + /* q now pts at the back of the class */ + p = t; + *start = q + 1; + + bvp = (mawk_BV *) mawk_RE_malloc(MAWK, sizeof(mawk_BV)); + if (bvp == NULL) + return -MEMORY_FAILURE; + memset(bvp, 0, sizeof(mawk_BV)); + + if (*p == '^') { + comp_flag = 1; + p++; + } + else + comp_flag = 0; + + prev = -1; /* indicates - cannot be part of a range */ + + while (p < q) { + switch (*p) { + case '\\': + + t = p + 1; + prev = escape(&t); + on(*bvp, prev); + p = t; + break; + + case '-': + + if (prev == -1 || p + 1 == q) { + prev = '-'; + on(*bvp, '-'); + p++; + } + else { + int c; + char *mark = ++p; + + if (*p != '\\') + c = *(unsigned char *) p++; + else { + t = p + 1; + c = escape(&t); + p = t; + } + + if (prev <= c) { + block_on(*bvp, prev, c); + prev = -1; + } + else { /* back up */ + + p = mark; + prev = '-'; + on(*bvp, '-'); + } + } + break; + + default: + prev = *(unsigned char *) p++; + on(*bvp, prev); + break; + } + } + + if (comp_flag) { + for (p = (char *) bvp; p < (char *) bvp + sizeof(mawk_BV); p++) { + *p = ~*p; + } + } + + /* make sure zero is off */ + (*bvp)[0] &= ~1; + + *mp = mawk_RE_class(MAWK, store_bvp(MAWK, bvp)); + if (is_invm(*mp)) + return -MEMORY_FAILURE; + return T_CLASS; +} + + +/* storage for bit vectors so they can be reused , + stored in an unsorted linear array + the array grows as needed +*/ + +#define BV_GROWTH 6 + +static mawk_BV *store_bvp(mawk_state_t *MAWK, mawk_BV *bvp) +{ + register mawk_BV **p; + unsigned t; + + + if (MAWK->REbv_next == MAWK->REbv_limit) { + /* need to grow */ + if (!MAWK->REbv_base) { + /* first growth */ + t = 0; + MAWK->REbv_base = (mawk_BV **) mawk_RE_malloc(MAWK, BV_GROWTH * sizeof(mawk_BV *)); + if (MAWK->REbv_base == NULL) + return NULL; + MAWK->REbv_alloced = BV_GROWTH * sizeof(mawk_BV *); + } + else { + t = MAWK->REbv_next - MAWK->REbv_base; + MAWK->REbv_base = (mawk_BV **) mawk_RE_realloc(MAWK, MAWK->REbv_base, MAWK->REbv_alloced, (t + BV_GROWTH) * sizeof(mawk_BV *)); + MAWK->REbv_alloced = (t + BV_GROWTH) * sizeof(mawk_BV *); + if (MAWK->REbv_base == NULL) + return NULL; + } + + MAWK->REbv_next = MAWK->REbv_base + t; + MAWK->REbv_limit = MAWK->REbv_next + BV_GROWTH; + } + + /* put bvp in bv_next as a sentinal */ + *MAWK->REbv_next = bvp; + p = MAWK->REbv_base; + while (memcmp(*p, bvp, sizeof(mawk_BV))) + p++; + + if (p == MAWK->REbv_next) { + /* it is new */ + MAWK->REbv_next++; + } + else { + /* we already have it */ + mawk_RE_free(MAWK, bvp, sizeof(mawk_BV)); + } + + return *p; +} + + +/* ---------- convert escape sequences -------------*/ + +#define isoctal(x) ((x)>='0'&&(x)<='7') + +#define NOT_HEX 16 +static const char hex_val['f' - 'A' + 1] = { + 10, 11, 12, 13, 14, 15, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 10, 11, 12, 13, 14, 15 +}; + +/* interpret 1 character as hex */ +static int ctohex(register int c) +{ + int t; + + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'f' && (t = hex_val[c - 'A'])) + return t; + return NOT_HEX; +} + +#define RE_ET_END 7 + + + +/*----------------- + return the char + and move the pointer forward + on entry *s -> at the character after the slash + *-------------------*/ + +static int escape(char **start_p) +{ + register char *p = *start_p; + register unsigned x; + unsigned xx; + int i; + struct { + char in, out; + } escape_test[RE_ET_END + 1] = { + {'n', '\n'}, + {'t', '\t'}, + {'f', '\f'}, + {'b', '\b'}, + {'r', '\r'}, + {'a', '\07'}, + {'v', '\013'}, + {0, 0} + }; + + + escape_test[RE_ET_END].in = *p; + i = 0; + while (escape_test[i].in != *p) + i++; + if (i != RE_ET_END) { + /* in escape_test table */ + *start_p = p + 1; + return escape_test[i].out; + } + + if (isoctal(*p)) { + x = *p++ - '0'; + if (isoctal(*p)) { + x = (x << 3) + *p++ - '0'; + if (isoctal(*p)) + x = (x << 3) + *p++ - '0'; + } + *start_p = p; + return x & 0xff; + } + + if (*p == 0) + return '\\'; + + if (*p++ == 'x') { + if ((x = ctohex(*p)) == NOT_HEX) { + *start_p = p; + return 'x'; + } + + /* look for another hex digit */ + if ((xx = ctohex(*++p)) != NOT_HEX) { + x = (x << 4) + xx; + p++; + } + + *start_p = p; + return x; + } + + /* anything else \c -> c */ + *start_p = p; + return *(unsigned char *) (p - 1); +} diff --git a/src/libmawk/rexp/rexp1.c b/src/libmawk/rexp/rexp1.c new file mode 100644 index 0000000..913dc64 --- /dev/null +++ b/src/libmawk/rexp/rexp1.c @@ -0,0 +1,241 @@ + +/******************************************** +rexp1.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* re machine operations */ + +#include "rexp.h" +#include "libmawk/zmalloc.h" + +/* initialize a two state machine */ +static int new_TWO(mawk_state_t *MAWK, int type, MACHINE *mp) +{ + mp->start = (mawk_RESTATE *) mawk_RE_malloc(MAWK, 2 * STATESZ); + if (mp->start == NULL) { + MAWK->REerrno = MEMORY_FAILURE; + return -MEMORY_FAILURE; + } + mp->size = 2 * STATESZ; + mp->stop = mp->start + 1; + mp->start->type = type; + mp->stop->type = M_ACCEPT; + return 0; +} + +static const MACHINE INVM = {NULL, NULL}; + +/* build a machine that recognizes any */ +MACHINE mawk_RE_any(mawk_state_t *MAWK) +{ + MACHINE x; + + if (new_TWO(MAWK, M_ANY, &x) < 0) + return INVM; + return x; +} + +/* build a machine that recognizes the start of string */ +MACHINE mawk_RE_start(mawk_state_t *MAWK) +{ + MACHINE x; + + if (new_TWO(MAWK, M_START, &x) < 0) + return INVM; + return x; +} + +MACHINE mawk_RE_end(mawk_state_t *MAWK) +{ + MACHINE x; + + if (new_TWO(MAWK, M_END, &x) < 0) + return INVM; + return x; +} + +/* build a machine that recognizes a class */ +MACHINE mawk_RE_class(mawk_state_t *MAWK, mawk_BV *bvp) +{ + MACHINE x; + + if (new_TWO(MAWK, M_CLASS, &x) < 0) + return INVM; + x.start->data.bvp = bvp; + return x; +} + +MACHINE mawk_RE_u(mawk_state_t *MAWK) +{ + MACHINE x; + + if (new_TWO(MAWK, M_U, &x) < 0) + return INVM; + return x; +} + +MACHINE mawk_RE_str(mawk_state_t *MAWK, char *str, unsigned len) +{ + MACHINE x; + + if (new_TWO(MAWK, M_STR, &x) < 0) + return INVM; + x.start->len = len; + x.start->data.str = str; + return x; +} + + +/* replace m and n by a machine that recognizes mn */ +int mawk_RE_cat(mawk_state_t *MAWK, MACHINE *mp, MACHINE *np) +{ + unsigned sz1, sz2, sz; + + sz1 = mp->stop - mp->start; + sz2 = np->stop - np->start + 1; + sz = sz1 + sz2; + + mp->start = (mawk_RESTATE *) mawk_RE_realloc(MAWK, mp->start, mp->size, sz * STATESZ); + if (mp->start == NULL) + return -MEMORY_FAILURE; + mp->size = sz * STATESZ; + mp->stop = mp->start + (sz - 1); + memcpy(mp->start + sz1, np->start, sz2 * STATESZ); + mawk_RE_free(MAWK, np->start, np->size); + return 0; +} + + /* replace m by a machine that recognizes m|n */ + +int mawk_RE_or(mawk_state_t *MAWK, MACHINE *mp, MACHINE *np) +{ + register mawk_RESTATE *p; + unsigned szm, szn; + + szm = mp->stop - mp->start + 1; + szn = np->stop - np->start + 1; + + p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (szm + szn + 1) * STATESZ); + if (p == NULL) + return -MEMORY_FAILURE; + memcpy(p + 1, mp->start, szm * STATESZ); + mawk_RE_free(MAWK, mp->start, mp->size); + mp->start = p; + mp->size = (szm + szn + 1) * STATESZ; + (mp->stop = p + szm + szn)->type = M_ACCEPT; + p->type = M_2JA; + p->data.jump = szm + 1; + memcpy(p + szm + 1, np->start, szn * STATESZ); + mawk_RE_free(MAWK, np->start, np->size); + (p += szm)->type = M_1J; + p->data.jump = szn; + return 0; +} + +/* UNARY OPERATIONS */ + +/* replace m by m* */ +int mawk_RE_close(mawk_state_t *MAWK, MACHINE *mp) +{ + register mawk_RESTATE *p; + unsigned sz; + + sz = mp->stop - mp->start + 1; + p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (sz + 2) * STATESZ); + if (p == NULL) + return -MEMORY_FAILURE; + memcpy(p + 1, mp->start, sz * STATESZ); + mawk_RE_free(MAWK, mp->start, mp->size); + mp->start = p; + mp->size = (sz + 2) * STATESZ; + mp->stop = p + (sz + 1); + p->type = M_2JA; + p->data.jump = sz + 1; + (p += sz)->type = M_2JB; + p->data.jump = -(sz - 1); + (p + 1)->type = M_ACCEPT; + return 0; +} + +/* replace m by m+ (positive closure) */ +int mawk_RE_poscl(mawk_state_t *MAWK, MACHINE *mp) +{ + register mawk_RESTATE *p; + unsigned sz; + + sz = mp->stop - mp->start + 1; + mp->start = p = (mawk_RESTATE *) mawk_RE_realloc(MAWK, mp->start, mp->size, (sz + 1) * STATESZ); + mp->size = (sz + 1) * STATESZ; + if (mp->start == NULL) + return -MEMORY_FAILURE; + mp->stop = p + sz; + p += --sz; + p->type = M_2JB; + p->data.jump = -sz; + (p + 1)->type = M_ACCEPT; + return 0; +} + +/* replace m by m? (zero or one) */ +int mawk_RE_01(mawk_state_t *MAWK, MACHINE *mp) +{ + unsigned sz; + register mawk_RESTATE *p; + + sz = mp->stop - mp->start + 1; + p = (mawk_RESTATE *) mawk_RE_malloc(MAWK, (sz + 1) * STATESZ); + if (p == NULL) + return -MEMORY_FAILURE; + memcpy(p + 1, mp->start, sz * STATESZ); + mawk_RE_free(MAWK, mp->start, mp->size); + mp->start = p; + mp->size = (sz + 1) * STATESZ; + mp->stop = p + sz; + p->type = M_2JB; + p->data.jump = sz; + return 0; +} + +/*=================================== +MEMORY ALLOCATION + *==============================*/ + + +PTR mawk_RE_malloc(mawk_state_t *MAWK, unsigned sz) +{ + PTR p; + p = mawk_zmalloc(MAWK, sz); +#ifdef MAWK_RE_MDEBUG + fprintf(stderr, "RE malloc: -> [%p] %d\n", p, sz); +#endif + return p; +} + +PTR mawk_RE_realloc(mawk_state_t *MAWK, register PTR p, unsigned oldsz, unsigned sz) +{ + PTR n; + n = mawk_zrealloc(MAWK, p, oldsz, sz); +#ifdef MAWK_RE_MDEBUG + fprintf(stderr, "RE realloc: [%p] %d -> [%p] %d\n", p, oldsz, n, sz); +#endif + return n; +} + +void mawk_RE_free(mawk_state_t *MAWK, PTR p, unsigned sz) +{ +#ifdef MAWK_RE_MDEBUG + fprintf(stderr, "RE free: [%p] %d\n", p, sz); +#endif + mawk_zfree(MAWK, p, sz); +} diff --git a/src/libmawk/rexp/rexp2.c b/src/libmawk/rexp/rexp2.c new file mode 100644 index 0000000..465a203 --- /dev/null +++ b/src/libmawk/rexp/rexp2.c @@ -0,0 +1,352 @@ + +/******************************************** +rexp2.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +/* test a string against a machine */ + +#include "rexp.h" +#include + +#define STACKGROWTH 16 + +#ifdef DEBUG +static mawk_RT_STATE *slow_push(mawk_state_t *MAWK, mawk_RT_STATE *, mawk_RESTATE *, char *, int); +#endif + +int mawk_RE_run_stack_init(mawk_state_t *MAWK) +{ + if (!MAWK->RE_run_stack_base) { + MAWK->RE_run_stack_base = (mawk_RT_STATE *) + mawk_RE_malloc(MAWK, sizeof(mawk_RT_STATE) * STACKGROWTH); + if (MAWK->RE_run_stack_base == NULL) + return -MEMORY_FAILURE; + MAWK->RE_run_stack_limit = MAWK->RE_run_stack_base + STACKGROWTH; + MAWK->RE_run_stack_empty = MAWK->RE_run_stack_base - 1; + } + return 0; +} + +/* sometimes during mawk_REmatch(), this stack can grow pretty large. + In real life cases, the back tracking usually fails. Some + work is needed here to improve the algorithm. + I.e., figure out how not to stack useless paths. +*/ + +mawk_RT_STATE *mawk_RE_new_run_stack(mawk_state_t *MAWK) +{ + int oldsize = MAWK->RE_run_stack_limit - MAWK->RE_run_stack_base; + int newsize = oldsize + STACKGROWTH; + +#ifdef LMDOS /* large model DOS */ + /* have to worry about overflow on multiplication (ugh) */ + if (newsize >= 4096) + MAWK->RE_run_stack_base = (mawk_RT_STATE *) 0; + else +#endif + + MAWK->RE_run_stack_base = (mawk_RT_STATE *) mawk_RE_realloc(MAWK, MAWK->RE_run_stack_base, oldsize * sizeof(mawk_RT_STATE), newsize * sizeof(mawk_RT_STATE)); + + if (!MAWK->RE_run_stack_base) { + fprintf(stderr, "out of memory for RE run time stack\n"); + /* this is pretty unusual, I've only seen it happen on + weird input to mawk_REmatch() under 16bit DOS , the same + situation worked easily on 32bit machine. */ + exit(100); + } + + MAWK->RE_run_stack_limit = MAWK->RE_run_stack_base + newsize; + MAWK->RE_run_stack_empty = MAWK->RE_run_stack_base - 1; + + /* return the new stackp */ + return MAWK->RE_run_stack_base + oldsize; +} + +#ifdef DEBUG +static mawk_RT_STATE *slow_push(mawk_state_t *MAWK, mawk_RT_STATE *sp, mawk_RESTATE *m, char *s, int u) +{ + if (sp == MAWK->RE_run_stack_limit) + sp = mawk_RE_new_run_stack(MAWK); + sp->m = m; + sp->s = s; + sp->u = u; + return sp; +} +#endif + +#ifdef DEBUG +#define push(mx,sx,ux) stackp = slow_push(MAWK, ++stackp, mx, sx, ux) +#else +#define push(mx,sx,ux) if (++stackp == MAWK->RE_run_stack_limit)\ + stackp = mawk_RE_new_run_stack(MAWK) ;\ +stackp->m=(mx);stackp->s=(sx);stackp->u=(ux) +#endif + + +#define CASE_UANY(x) case x + U_OFF : case x + U_ON + +/* test if str ~ /machine/ +*/ + +int mawk_REtest(mawk_state_t *MAWK, char *str, PTR machine) +{ + register mawk_RESTATE *m = (mawk_RESTATE *) machine; + register char *s = str; + register mawk_RT_STATE *stackp; + int u_flag; + char *str_end; + int t; /*convenient temps */ + mawk_RESTATE *tm; + + /* handle the easy case quickly */ + if ((m + 1)->type == M_ACCEPT && m->type == M_STR) + return mawk_str_str(s, m->data.str, m->len) != (char *) 0; + else { + u_flag = U_ON; + str_end = (char *) 0; + stackp = MAWK->RE_run_stack_empty; + goto reswitch; + } + +refill: + if (stackp == MAWK->RE_run_stack_empty) + return 0; + m = stackp->m; + s = stackp->s; + u_flag = stackp--->u; + + +reswitch: + + switch (m->type + u_flag) { + case M_STR + U_OFF + END_OFF: + if (strncmp(s, m->data.str, m->len)) + goto refill; + s += m->len; + m++; + goto reswitch; + + case M_STR + U_OFF + END_ON: + if (strcmp(s, m->data.str)) + goto refill; + s += m->len; + m++; + goto reswitch; + + case M_STR + U_ON + END_OFF: + if (!(s = mawk_str_str(s, m->data.str, m->len))) + goto refill; + push(m, s + 1, U_ON); + s += m->len; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_STR + U_ON + END_ON: + if (!str_end) + str_end = s + strlen(s); + t = (str_end - s) - m->len; + if (t < 0 || memcmp(s + t, m->data.str, m->len)) + goto refill; + s = str_end; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_CLASS + U_OFF + END_OFF: + if (!ison(*m->data.bvp, s[0])) + goto refill; + s++; + m++; + goto reswitch; + + case M_CLASS + U_OFF + END_ON: + if (s[1] || !ison(*m->data.bvp, s[0])) + goto refill; + s++; + m++; + goto reswitch; + + case M_CLASS + U_ON + END_OFF: + while (!ison(*m->data.bvp, s[0])) { + if (s[0] == 0) + goto refill; + else + s++; + } + s++; + push(m, s, U_ON); + m++; + u_flag = U_OFF; + goto reswitch; + + case M_CLASS + U_ON + END_ON: + if (!str_end) + str_end = s + strlen(s); + if (s[0] == 0 || !ison(*m->data.bvp, str_end[-1])) + goto refill; + s = str_end; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_ANY + U_OFF + END_OFF: + if (s[0] == 0) + goto refill; + s++; + m++; + goto reswitch; + + case M_ANY + U_OFF + END_ON: + if (s[0] == 0 || s[1] != 0) + goto refill; + s++; + m++; + goto reswitch; + + case M_ANY + U_ON + END_OFF: + if (s[0] == 0) + goto refill; + s++; + push(m, s, U_ON); + m++; + u_flag = U_OFF; + goto reswitch; + + case M_ANY + U_ON + END_ON: + if (s[0] == 0) + goto refill; + if (!str_end) + str_end = s + strlen(s); + s = str_end; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_START + U_OFF + END_OFF: + case M_START + U_ON + END_OFF: + if (s != str) + goto refill; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_START + U_OFF + END_ON: + case M_START + U_ON + END_ON: + if (s != str || s[0] != 0) + goto refill; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_END + U_OFF: + if (s[0] != 0) + goto refill; + m++; + goto reswitch; + + case M_END + U_ON: + s += strlen(s); + m++; + u_flag = U_OFF; + goto reswitch; + + CASE_UANY(M_U): + u_flag = U_ON; + m++; + goto reswitch; + + CASE_UANY(M_1J): + m += m->data.jump; + goto reswitch; + + CASE_UANY(M_2JA): /* take the non jump branch */ + /* don't stack an ACCEPT */ + if ((tm = m + m->data.jump)->type == M_ACCEPT) + return 1; + push(tm, s, u_flag); + m++; + goto reswitch; + + CASE_UANY(M_2JB): /* take the jump branch */ + /* don't stack an ACCEPT */ + if ((tm = m + 1)->type == M_ACCEPT) + return 1; + push(tm, s, u_flag); + m += m->data.jump; + goto reswitch; + + CASE_UANY(M_ACCEPT): + return 1; + + default: + mawk_RE_panic("unexpected case in mawk_REtest"); + } + return -1; +} + + + +#ifndef NOT_FOR_MAWK + +char *mawk_is_string_split(register mawk_RESTATE *p, unsigned *lenp) +{ + if (p[0].type == M_STR && p[1].type == M_ACCEPT) { + *lenp = p->len; + return p->data.str; + } + else + return (char *) 0; +} +#else /* mawk provides its own mawk_str_str */ + +char *mawk_str_str(register char *target, register char *key, unsigned klen) +{ + int c = key[0]; + + switch (klen) { + case 0: + return (char *) 0; + + case 1: + return strchr(target, c); + + case 2: + { + int c1 = key[1]; + + while (target = strchr(target, c)) { + if (target[1] == c1) + return target; + else + target++; + } + break; + } + + default: + klen--; + key++; + while (target = strchr(target, c)) { + if (memcmp(target + 1, key, klen) == 0) + return target; + else + target++; + } + break; + } + return (char *) 0; +} + + +#endif /* FORMAWK */ diff --git a/src/libmawk/rexp/rexp3.c b/src/libmawk/rexp/rexp3.c new file mode 100644 index 0000000..1d04019 --- /dev/null +++ b/src/libmawk/rexp/rexp3.c @@ -0,0 +1,373 @@ + +/******************************************** +rexp3.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +/* match a string against a machine */ + +#include +#include +#include "rexp.h" + + +mawk_RT_STATE *mawk_RE_new_run_stack(mawk_state_t *MAWK); + + +#define push(mx,sx,ssx,ux) if (++stackp == MAWK->RE_run_stack_limit)\ + stackp = mawk_RE_new_run_stack(MAWK) ;\ +stackp->m=(mx);stackp->s=(sx);stackp->ss=(ssx);\ +stackp->u = (ux) + + +#define CASE_UANY(x) case x + U_OFF : case x + U_ON + +/* returns start of first longest match and the length by + reference. If no match returns NULL and length zero */ + +char *mawk_REmatch(mawk_state_t *MAWK, char *str, PTR machine, unsigned *lenp, int disable_match_at_start) +{ + register mawk_RESTATE *m = (mawk_RESTATE *) machine; + register char *s = str; + char *ss; + register mawk_RT_STATE *stackp; + int u_flag, t; + char *str_end, *ts; + + /* state of current best match stored here */ + char *cb_ss; /* the start */ + char *cb_e; /* the end , pts at first char not matched */ + + *lenp = 0; + + /* check for the easy case */ + if ((m + 1)->type == M_ACCEPT && m->type == M_STR) { + if ((ts = mawk_str_str(s, m->data.str, m->len))) + *lenp = m->len; + return ts; + } + + u_flag = U_ON; + cb_ss = ss = str_end = (char *) 0; + stackp = MAWK->RE_run_stack_empty; + goto reswitch; + +refill: + if (stackp == MAWK->RE_run_stack_empty) { + if (cb_ss) + *lenp = cb_e - cb_ss; + return cb_ss; + } + ss = stackp->ss; + s = stackp--->s; + if (cb_ss) { /* does new state start too late ? */ + if (ss) { + if (cb_ss < ss) + goto refill; + } + else if (cb_ss < s) + goto refill; + } + + m = (stackp + 1)->m; + u_flag = (stackp + 1)->u; + + +reswitch: + + switch (m->type + u_flag) { + case M_STR + U_OFF + END_OFF: + if (strncmp(s, m->data.str, m->len)) + goto refill; + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + s += m->len; + m++; + goto reswitch; + + case M_STR + U_OFF + END_ON: + if (strcmp(s, m->data.str)) + goto refill; + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + s += m->len; + m++; + goto reswitch; + + case M_STR + U_ON + END_OFF: + if (!(s = mawk_str_str(s, m->data.str, m->len))) + goto refill; + push(m, s + 1, ss, U_ON); + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + s += m->len; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_STR + U_ON + END_ON: + if (!str_end) + str_end = s + strlen(s); + t = (str_end - s) - m->len; + if (t < 0 || memcmp(ts = s + t, m->data.str, m->len)) + goto refill; + if (!ss) { + if (cb_ss && ts > cb_ss) + goto refill; + else + ss = ts; + } + s = str_end; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_CLASS + U_OFF + END_OFF: + if (!ison(*m->data.bvp, s[0])) + goto refill; + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + s++; + m++; + goto reswitch; + + case M_CLASS + U_OFF + END_ON: + if (s[1] || !ison(*m->data.bvp, s[0])) + goto refill; + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + s++; + m++; + goto reswitch; + + case M_CLASS + U_ON + END_OFF: + while (!ison(*m->data.bvp, s[0])) { + if (s[0] == 0) + goto refill; + else + s++; + } + s++; + push(m, s, ss, U_ON); + if (!ss) { + if (cb_ss && s - 1 > cb_ss) + goto refill; + else + ss = s - 1; + } + m++; + u_flag = U_OFF; + goto reswitch; + + case M_CLASS + U_ON + END_ON: + if (!str_end) + str_end = s + strlen(s); + if (s[0] == 0 || !ison(*m->data.bvp, str_end[-1])) + goto refill; + if (!ss) { + if (cb_ss && str_end - 1 > cb_ss) + goto refill; + else + ss = str_end - 1; + } + s = str_end; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_ANY + U_OFF + END_OFF: + if (s[0] == 0) + goto refill; + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + s++; + m++; + goto reswitch; + + case M_ANY + U_OFF + END_ON: + if (s[0] == 0 || s[1] != 0) + goto refill; + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + s++; + m++; + goto reswitch; + + case M_ANY + U_ON + END_OFF: + if (s[0] == 0) + goto refill; + s++; + push(m, s, ss, U_ON); + if (!ss) { + if (cb_ss && s - 1 > cb_ss) + goto refill; + else + ss = s - 1; + } + m++; + u_flag = U_OFF; + goto reswitch; + + case M_ANY + U_ON + END_ON: + if (s[0] == 0) + goto refill; + if (!str_end) + str_end = s + strlen(s); + if (!ss) { + if (cb_ss && str_end - 1 > cb_ss) + goto refill; + else + ss = str_end - 1; + } + s = str_end; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_START + U_OFF + END_OFF: + case M_START + U_ON + END_OFF: + if ((disable_match_at_start) || (s != str)) + goto refill; + ss = s; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_START + U_OFF + END_ON: + case M_START + U_ON + END_ON: + if (disable_match_at_start || s != str || s[0] != 0) + goto refill; + ss = s; + m++; + u_flag = U_OFF; + goto reswitch; + + case M_END + U_OFF: + if (s[0] != 0) + goto refill; + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + m++; + goto reswitch; + + case M_END + U_ON: + s = str_end ? str_end : (str_end = s + strlen(s)); + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + m++; + u_flag = U_OFF; + goto reswitch; + + CASE_UANY(M_U): + if (!ss) { + if (cb_ss && s > cb_ss) + goto refill; + else + ss = s; + } + u_flag = U_ON; + m++; + goto reswitch; + + CASE_UANY(M_1J): + m += m->data.jump; + goto reswitch; + + CASE_UANY(M_2JA): /* take the non jump branch */ + push(m + m->data.jump, s, ss, u_flag); + m++; + goto reswitch; + + CASE_UANY(M_2JB): /* take the jump branch */ + push(m + 1, s, ss, u_flag); + m += m->data.jump; + goto reswitch; + + case M_ACCEPT + U_OFF: + if (!ss) + ss = s; + if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { + /* we have a new current best */ + cb_ss = ss; + cb_e = s; + } + else if (ss == cb_ss && s == cb_e) { + if (cb_ss) + *lenp = (unsigned) (cb_e - cb_ss); + return cb_ss; + } + + goto refill; + + case M_ACCEPT + U_ON: + if (!ss) + ss = s; + else + s = str_end ? str_end : (str_end = s + strlen(s)); + + if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) { + /* we have a new current best */ + cb_ss = ss; + cb_e = s; + } + else if (ss == cb_ss && s == cb_e) { + if (cb_ss) + *lenp = (unsigned) (cb_e - cb_ss); + return cb_ss; + } + goto refill; + + default: + mawk_RE_panic("unexpected case in mawk_REmatch"); + } + + /* can't get here, but need return to suppress compiler warning; abort() + is just for extra paranoia. */ + abort(); + return NULL; +} diff --git a/src/libmawk/rexp/rexpdb.c b/src/libmawk/rexp/rexpdb.c new file mode 100644 index 0000000..1fd88d0 --- /dev/null +++ b/src/libmawk/rexp/rexpdb.c @@ -0,0 +1,79 @@ + +/******************************************** +rexpdb.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "rexp.h" +#include + +/* print a machine for debugging */ + +static const char *xlat[] = { + "M_STR", + "M_CLASS", + "M_ANY", + "M_START", + "M_END", + "M_U", + "M_1J", + "M_2JA", + "M_2JB", + "M_ACCEPT" +}; + +void mawk_REmprint(PTR m, FILE *f) +{ + register mawk_RESTATE *p = (mawk_RESTATE *) m; + char *end_on_string; + + while (1) { + if (p->type >= END_ON) { + p->type -= END_ON; + end_on_string = "$"; + } + else + end_on_string = ""; + + if (p->type < 0 || p->type >= END_ON) { + fprintf(f, "unknown mawk_RESTATE type\n"); + return; + } + + fprintf(f, "%-10s", xlat[(int)p->type]); + switch (p->type) { + case M_STR: + fprintf(f, "%s", p->data.str); + break; + + case M_1J: + case M_2JA: + case M_2JB: + fprintf(f, "%d", p->data.jump); + break; + case M_CLASS: + { + unsigned char *q = (unsigned char *) p->data.bvp; + unsigned char *r = q + sizeof(mawk_BV); + while (q < r) + fprintf(f, "%x ", *q++); + } + break; + } + fprintf(f, "%s\n", end_on_string); + if (end_on_string[0]) + p->type += END_ON; + if (p->type == M_ACCEPT) + return; + p++; + } +} diff --git a/src/libmawk/scan.c b/src/libmawk/scan.c new file mode 100644 index 0000000..5487be7 --- /dev/null +++ b/src/libmawk/scan.c @@ -0,0 +1,1141 @@ + +/******************************************** +scan.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* help text editors to find out nesting without context */ +#define CL_BRACE '{' + +#define _POSIX_SOURCE +#define _BSD_SOURCE + +#include "mawk.h" +#include +#include +#include +#include "scan.h" +#include "memory.h" +#include "field.h" +#include "init.h" +#include "fin.h" +#include "repl.h" +#include "code.h" +#include "bi_vars.h" +#include "vio.h" +#include "da_bin_helper.h" + +#ifndef PATH_MAX +#define PATH_MAX 1024 +#endif + +#ifndef NO_FCNTL_H +#include +#endif + +#include "files.h" + + +/* static functions */ +static void scan_fillbuff(mawk_state_t * MAWK); +int mawk_scan_open(mawk_state_t * MAWK); +static int slow_next(mawk_state_t * MAWK); +static void eat_comment(mawk_state_t * MAWK); +static void eat_semi_colon(mawk_state_t * MAWK); +static mawk_num_t collect_decimal(mawk_state_t *, YYSTYPE *lvalp, int, int *); +static int collect_string(mawk_state_t *MAWK, YYSTYPE *lvalp); +static int collect_RE(mawk_state_t *MAWK, YYSTYPE *lvalp); + + + +/*----------------------------- + program file management + *----------------------------*/ + +/* return 1 if no more processing is needed (was a binary file) */ +int mawk_scan_init(mawk_state_t *MAWK, char *cmdline_program) +{ + if (cmdline_program) { + MAWK->ps.program_fin = NULL; /* command line program */ + MAWK->ps.program_string = mawk_new_STRING0(MAWK, strlen(cmdline_program) + 1); + strcpy(MAWK->ps.program_string->str, cmdline_program); + /* simulate file termination */ + MAWK->ps.program_string->str[MAWK->ps.program_string->len - 1] = '\n'; + MAWK->ps.buffp = (unsigned char *) MAWK->ps.program_string->str; + MAWK->ps.eof_flag = 1; + } + else { /* program from file[s] */ + + /* loading the script can not fail yet, as this is the first file */ + if (mawk_scan_open(MAWK) == 2) { + un_next(); + return 1; + } + + if (MAWK->ps.buffer == NULL) + MAWK->ps.buffer = (unsigned char *) mawk_zmalloc(MAWK, BUFFSZ + 1); + MAWK->ps.buffp = MAWK->ps.buffer; + if (MAWK->do_exit) + return -1; + scan_fillbuff(MAWK); + } + +#ifdef OS2 /* OS/2 "extproc" is similar to #! */ + if (strnicmp(MAWK->ps.buffp, "extproc ", 8) == 0) + eat_comment(MAWK); +#endif + mawk_eat_nl(MAWK, NULL); /* scan to first token */ + if (next(MAWK) == 0) { + /* no program */ + mawk_exitval(MAWK, 0, -1); + } + + un_next(); + return 0; +} + +/* open MAWK->pfile_name[0]; returns 1 on success, 0 if the file could not +be open (error is also set) or is duplicate and ignored (normal condition) */ +int mawk_scan_open(mawk_state_t * MAWK) +{ /* open pfile_name */ + if (MAWK->ps.pfile_name[0] == '-' && MAWK->ps.pfile_name[1] == 0) { + MAWK->ps.program_fin = mawk_file_find_(MAWK, "/dev/stdin", F_IN, 1); + } + else { + const char *fn; + char *start, *end, *osp; + int len, nlen, uniq; + char bc; + char path[PATH_MAX]; +#ifdef mawk_realpath + char rpath[PATH_MAX]; +#else +# define rpath path +#endif + mawk_cell_t idx; + + fn = MAWK->ps.pfile_name; + bc = MAWK->ps.pfile_bytecode; + if (*fn == '+') { + fn++; + uniq = 1; + } + else + uniq = 0; + + nlen = strlen(fn); + osp = ((mawk_string_t *) LIBPATH->ptr)->str; + if ((osp == NULL) || (*osp == '\0') || (*fn == '/')) + osp = ""; + + for (end = start = osp; end != NULL; start = end + 1) { + end = strchr(start, ';'); + if (end == NULL) + len = strlen(start); + else + len = end - start; + + if (len > 0) { + memcpy(path, start, len); + path[len] = '/'; + len++; + path[len] = '\0'; + } + else + path[0] = '\0'; + + if (len + nlen > sizeof(path)) { + mawk_errmsg(MAWK, errno, "cannot load awk script - path too long ('%s' '%s' vs %d bytes)", path, MAWK->ps.pfile_name, + sizeof(path)); + mawk_exitval(MAWK, 2, -1); + } + memcpy(path + len, fn, nlen); + path[len + nlen] = '\0'; + +#ifdef mawk_realpath + if (mawk_realpath(path, rpath) == NULL) { + mawk_errmsg(MAWK, errno, "cannot convert to realpath ('%s')", path); + mawk_exitval(MAWK, 2, -1); + } +#endif + + idx.type = C_STRING; + idx.ptr = mawk_new_STRING(MAWK, rpath); + if (uniq) { + /* we assume if an entry is in the array, the file exists, + as we already could load it once and there is no reason to assume + it has disappeared meanwhile - well, in theory this is a race + condition as another process may have deleted the file, but then + we would fail with "not found" anyway. */ + if (mawk_array_find(MAWK, MAWK->scripts_loaded, &idx, NULL, 0) != 0) { + free_STRING((mawk_string_t *)idx.ptr); + return 0; + } + } + + MAWK->binary_loaded = 0; + if (bc) { + if (mawk_load_code_bin(MAWK, path) != 0) { + mawk_errmsg(MAWK, 0, "failed to load or link binary script %s", path); + mawk_exitval(MAWK, 2, -1); + return 0; + } + MAWK->binary_loaded = 1; + free_STRING((mawk_string_t *)idx.ptr); + return 2; + } + else if ((MAWK->ps.program_fin = mawk_file_find_(MAWK, path, F_IN, 1)) != NULL) { + mawk_cell_t one; + one.type = C_NUM; + one.d.dval = MAWK_NUM_ONE; + mawk_array_set(MAWK, MAWK->scripts_loaded, &idx, &one); + free_STRING((mawk_string_t *)idx.ptr); + return 1; + } + free_STRING((mawk_string_t *)idx.ptr); + } + + mawk_errmsg(MAWK, errno, "cannot open script %s (used search path %s)", fn, osp); + mawk_exitval(MAWK, 2, -1); + } + return 0; +} + +void mawk_scan_cleanup(mawk_state_t * MAWK) +{ + if (MAWK->ps.program_fin != NULL) { + mawk_zfree(MAWK, MAWK->ps.buffer, BUFFSZ + 1); + MAWK->ps.buffer = NULL; + MAWK->ps.buffp = NULL; + } + else if (MAWK->ps.program_string != NULL) { + free_STRING(MAWK->ps.program_string); + } + + if (MAWK->ps.program_fin != NULL) { + mawk_file_close_(MAWK, MAWK->ps.program_fin); + MAWK->ps.program_fin = NULL; + } + + /* redefine SPACE as [ \t\n] */ + + MAWK->scan_code['\n'] = MAWK->posix_space_flag && MAWK->rs_shadow.type != SEP_MLR ? SC_UNEXPECTED : SC_SPACE; + MAWK->scan_code['\f'] = SC_UNEXPECTED; /*value doesn't matter */ + MAWK->scan_code['\013'] = SC_UNEXPECTED; /* \v not space */ + MAWK->scan_code['\r'] = SC_UNEXPECTED; +} + +/*---------------------------------------- + file reading functions + next() and un_next(c) are macros in scan.h + + *---------------------*/ + +static void scan_fillbuff(mawk_state_t * MAWK) +{ + unsigned r = 0; + + if (MAWK->ps.program_fin != NULL) + r = mawk_fillbuff(MAWK, MAWK->ps.program_fin->fin, (char *) MAWK->ps.buffer, BUFFSZ, 0); + if (r < BUFFSZ) { + MAWK->ps.eof_flag = 1; + /* make sure eof is terminated */ + MAWK->ps.buffer[r] = '\n'; + MAWK->ps.buffer[r + 1] = 0; + } +} + +/* read one character -- slowly */ +static int slow_next(mawk_state_t * MAWK) +{ + + while (*MAWK->ps.buffp == 0) { + + if (!MAWK->ps.eof_flag) { + MAWK->ps.buffp = MAWK->ps.buffer; + scan_fillbuff(MAWK); + } + else { + PFILE *q; + + if (MAWK->ps.program_fin != NULL) { + mawk_file_close_(MAWK, MAWK->ps.program_fin); + MAWK->ps.program_fin = NULL; + } + + if (mawk_parser_pop(MAWK) == 0) { + MAWK->ps.eof_flag = 0; + do { + if (MAWK->pfile_list != NULL) { + MAWK->ps.pfile_name = MAWK->pfile_list->fname; + MAWK->ps.pfile_bytecode = MAWK->pfile_list->bytecode; + q = MAWK->pfile_list; + MAWK->pfile_list = MAWK->pfile_list->link; + MAWK_ZFREE(MAWK, q); + } + else { + MAWK->ps.eof_flag = 1; + goto real_eof; + } + } while (mawk_scan_open(MAWK) != 1); + MAWK->token_lineno = MAWK->lineno = 1; + } + } + } + +real_eof:; + return *MAWK->ps.buffp++; /* note can un_next() , eof which is zero */ +} + +static void eat_comment(mawk_state_t * MAWK) +{ + register int c; + + while ((c = next(MAWK)) != '\n' && MAWK->scan_code[c]); + un_next(); +} + +/* this is how we handle extra semi-colons that are + now allowed to separate pattern-action blocks + + A proof that they are useless clutter to the language: + we throw them away +*/ + +static void eat_semi_colon(mawk_state_t * MAWK) +/* eat one semi-colon on the current line */ +{ + register int c; + + while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE); + if (c != ';') + un_next(); +} + +void mawk_eat_nl(mawk_state_t * MAWK, YYSTYPE *lvalp) +{ /* eat all space including newlines */ + while (1) + switch (MAWK->scan_code[next(MAWK)]) { + case SC_COMMENT: + eat_comment(MAWK); + break; + + case SC_NL: + MAWK->lineno++; + /* fall thru */ + + case SC_SPACE: + break; + + case SC_ESCAPE: + /* bug fix - surprised anyone did this, + a csh user with backslash dyslexia.(Not a joke) + */ + { + unsigned c; + + while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE); + if (c == '\n') + MAWK->token_lineno = ++MAWK->lineno; + else if (c == 0) { + un_next(); + return; + } + else { /* error */ + + un_next(); + /* can't un_next() twice so deal with it */ + lvalp->ival = '\\'; + mawk_unexpected_char(MAWK, lvalp); + if (++(MAWK->compile_error_count) == MAX_COMPILE_ERRORS) + mawk_exit(MAWK, 2); + return; + } + } + break; + + default: + un_next(); + return; + } +} + +int Mawk_lex(YYSTYPE *lvalp, mawk_state_t * MAWK) +{ + register int c; + + if (MAWK->do_exit) + return -1; + + MAWK->token_lineno = MAWK->lineno; + MAWK->lvalp = lvalp; + +reswitch: + + switch (MAWK->scan_code[c = next(MAWK)]) { + case 0: + ct_ret(EOF); + + case SC_SPACE: + goto reswitch; + + case SC_COMMENT: + eat_comment(MAWK); + goto reswitch; + + case SC_NL: + MAWK->lineno++; + mawk_eat_nl(MAWK, lvalp); + ct_ret(NL); + + case SC_ESCAPE: + while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE); + if (c == '\n') { + MAWK->token_lineno = ++MAWK->lineno; + goto reswitch; + } + + if (c == 0) + ct_ret(EOF); + un_next(); + lvalp->ival = '\\'; + ct_ret(UNEXPECTED); + + + case SC_SEMI_COLON: + mawk_eat_nl(MAWK, lvalp); + ct_ret(SEMI_COLON); + + case SC_LBRACE: + mawk_eat_nl(MAWK, lvalp); + MAWK->brace_cnt++; + ct_ret(LBRACE); + + case SC_PLUS: + switch (next(MAWK)) { + case '+': + lvalp->ival = '+'; + string_buff[0] = string_buff[1] = '+'; + string_buff[2] = 0; + ct_ret(INC_or_DEC); + + case '=': + ct_ret(ADD_ASG); + + default: + un_next(); + ct_ret(PLUS); + } + + case SC_MINUS: + switch (next(MAWK)) { + case '-': + lvalp->ival = '-'; + string_buff[0] = string_buff[1] = '-'; + string_buff[2] = 0; + ct_ret(INC_or_DEC); + + case '=': + ct_ret(SUB_ASG); + + default: + un_next(); + ct_ret(MINUS); + } + + case SC_COMMA: + mawk_eat_nl(MAWK, lvalp); + ct_ret(COMMA); + + case SC_MUL: + mawk_test1_ret('=', MUL_ASG, MUL); + + case SC_DIV: + { + static const int can_precede_div[] = { DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD, + GETLINE, INC_or_DEC, -1 + }; + + const int *p = can_precede_div; + + do { + if (*p == MAWK->current_token) { + if (*p != INC_or_DEC) { + mawk_test1_ret('=', DIV_ASG, DIV); + } + + if (next(MAWK) == '=') { + un_next(); + ct_ret(collect_RE(MAWK, lvalp)); + } + } + } + while (*++p != -1); + + ct_ret(collect_RE(MAWK, lvalp)); + } + + case SC_MOD: + mawk_test1_ret('=', MOD_ASG, MOD); + + case SC_POW: + mawk_test1_ret('=', POW_ASG, POW); + + case SC_LPAREN: + MAWK->paren_cnt++; + ct_ret(LPAREN); + + case SC_RPAREN: + if (--MAWK->paren_cnt < 0) { + mawk_compile_error(MAWK, "extra ')'"); + MAWK->paren_cnt = 0; + goto reswitch; + } + + ct_ret(RPAREN); + + case SC_LBOX: + ct_ret(LBOX); + + case SC_RBOX: + ct_ret(RBOX); + + case SC_MATCH: + string_buff[0] = '~'; + string_buff[0] = 0; + lvalp->ival = 1; + ct_ret(MATCH); + + case SC_EQUAL: + mawk_test1_ret('=', EQ, ASSIGN); + + case SC_NOT: /* ! */ + if ((c = next(MAWK)) == '~') { + string_buff[0] = '!'; + string_buff[1] = '~'; + string_buff[2] = 0; + lvalp->ival = 0; + ct_ret(MATCH); + } + else if (c == '=') + ct_ret(NEQ); + + un_next(); + ct_ret(NOT); + + + case SC_LT: /* '<' */ + if (next(MAWK) == '=') + ct_ret(LTE); + else + un_next(); + + if (MAWK->getline_flag) { + MAWK->getline_flag = 0; + ct_ret(IO_IN); + } + else + ct_ret(LT); + + case SC_GT: /* '>' */ + if (MAWK->print_flag && MAWK->paren_cnt == 0) { + MAWK->print_flag = 0; + /* there are 3 types of IO_OUT + -- build the error string in string_buff */ + string_buff[0] = '>'; + if (next(MAWK) == '>') { + lvalp->ival = F_APPEND; + string_buff[1] = '>'; + string_buff[2] = 0; + } + else { + un_next(); + lvalp->ival = F_TRUNC; + string_buff[1] = 0; + } + return MAWK->current_token = IO_OUT; + } + + mawk_test1_ret('=', GTE, GT); + + case SC_OR: + if (next(MAWK) == '|') { + mawk_eat_nl(MAWK, lvalp); + ct_ret(OR); + } + else { + un_next(); + + if (MAWK->print_flag && MAWK->paren_cnt == 0) { + MAWK->print_flag = 0; + lvalp->ival = PIPE_OUT; + string_buff[0] = '|'; + string_buff[1] = 0; + ct_ret(IO_OUT); + } + else + ct_ret(PIPE); + } + + case SC_AND: + if (next(MAWK) == '&') { + mawk_eat_nl(MAWK, lvalp); + ct_ret(AND); + } + else { + un_next(); + lvalp->ival = '&'; + ct_ret(UNEXPECTED); + } + + case SC_QMARK: + ct_ret(QMARK); + + case SC_COLON: + ct_ret(COLON); + + case SC_RBRACE: + if (--MAWK->brace_cnt < 0) { + mawk_compile_error(MAWK, "extra '}'"); + eat_semi_colon(MAWK); + MAWK->brace_cnt = 0; + goto reswitch; + } + + if ((c = MAWK->current_token) == NL || c == SEMI_COLON || c == SC_FAKE_SEMI_COLON || c == RBRACE) { + /* if the brace_cnt is zero , we've completed + a pattern action block. If the user insists + on adding a semi-colon on the same line + we will eat it. Note what we do below: + physical law -- conservation of semi-colons */ + + if (MAWK->brace_cnt == 0) + eat_semi_colon(MAWK); + mawk_eat_nl(MAWK, lvalp); + ct_ret(RBRACE); + } + + /* supply missing semi-colon to statement that + precedes a '}' */ + MAWK->brace_cnt++; + un_next(); + MAWK->current_token = SC_FAKE_SEMI_COLON; + return SEMI_COLON; + + case SC_DIGIT: + case SC_DOT: + { + mawk_num_t d; + int flag; + static const mawk_num_t double_zero = MAWK_NUM_ZERO; + static const mawk_num_t double_one = MAWK_NUM_ONE; + + if ((d = collect_decimal(MAWK, lvalp, c, &flag)) == MAWK_NUM_ZERO) { + if (flag) + ct_ret(flag); + else + lvalp->ptr = (PTR) & double_zero; + } + else if (d == 1.0) { + lvalp->ptr = (PTR) & double_one; + } + else { + lvalp->ptr = (PTR) MAWK_ZMALLOC(MAWK, mawk_num_t); + *(mawk_num_t *) lvalp->ptr = d; + } + ct_ret(DOUBLE); + } + + case SC_DOLLAR: /* '$' */ + { + mawk_num_t d; + int flag; + + while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE); + if (MAWK->scan_code[c] != SC_DIGIT && MAWK->scan_code[c] != SC_DOT) { + un_next(); + ct_ret(DOLLAR); + } + + /* compute field address at compile time */ + if ((d = collect_decimal(MAWK, lvalp, c, &flag)) == 0.0) { + if (flag) + ct_ret(flag); /* an error */ + else + lvalp->cp = &MAWK->field[0]; + } + else { + if (d > MAX_FIELD) { + mawk_compile_error(MAWK, "$%g exceeds maximum field(%d)", d, MAX_FIELD); + d = MAX_FIELD; + } + lvalp->cp = field_ptr((int) d); + } + + ct_ret(FIELD); + } + + case SC_DQUOTE: + return MAWK->current_token = collect_string(MAWK, lvalp); + + case SC_IDCHAR: /* collect an identifier */ + { + unsigned char *p = (unsigned char *) string_buff + 1; + SYMTAB *stp; + + string_buff[0] = c; + + while ((c = MAWK->scan_code[*p++ = next(MAWK)]) == SC_IDCHAR || c == SC_DIGIT); + + un_next(); + *--p = 0; + + switch ((stp = mawk_find(MAWK, string_buff, 1))->type) { + case ST_NONE: + /* check for function call before defined */ + if (next(MAWK) == '(') { + stp->type = ST_FUNCT; + stp->stval.fbp = (FBLOCK *) + mawk_zmalloc(MAWK, sizeof(FBLOCK)); + stp->stval.fbp->name = stp->name; + stp->stval.fbp->code = (INST *) 0; + lvalp->fbp = stp->stval.fbp; + MAWK->current_token = FUNCT_ID; + } + else { + lvalp->stp = stp; + MAWK->current_token = MAWK->current_token == DOLLAR ? D_ID : ID; + } + un_next(); + break; + + case ST_NR: + MAWK->NR_flag = 1; + stp->type = ST_VAR; + /* fall thru */ + + case ST_VAR: + case ST_ARRAY: + case ST_LOCAL_NONE: + case ST_LOCAL_VAR: + case ST_LOCAL_ARRAY: + + lvalp->stp = stp; + MAWK->current_token = MAWK->current_token == DOLLAR ? D_ID : ID; + break; + + case ST_FUNCT: + lvalp->fbp = stp->stval.fbp; + MAWK->current_token = FUNCT_ID; + break; + + case ST_C_FUNCTION: + lvalp->fbp = calloc(sizeof(FBLOCK), 1); + lvalp->fbp->name = mawk_strdup_(string_buff); + lvalp->fbp->code = NULL; + lvalp->fbp->c_next = MAWK->c_funcs; + MAWK->c_funcs = lvalp->fbp; + MAWK->current_token = C_FUNCT_ID; + break; + + + case ST_KEYWORD: + MAWK->current_token = stp->stval.kw; + break; + + case ST_BUILTIN: + lvalp->bip = stp->stval.bip; + MAWK->current_token = BUILTIN; + break; + + case ST_LENGTH: + + lvalp->bip = stp->stval.bip; + + /* check for length alone, this is an ugly + hack */ + while (MAWK->scan_code[c = next(MAWK)] == SC_SPACE); + un_next(); + + MAWK->current_token = c == '(' ? BUILTIN : LENGTH; + break; + + case ST_FIELD: + lvalp->cp = stp->stval.cp; + MAWK->current_token = FIELD; + break; + + + default: + fprintf(stderr, "stp type:%d\n", stp->type); + mawk_bozo(MAWK, "mawk_find returned bad st type"); + } + return MAWK->current_token; + } + + + case SC_UNEXPECTED: + lvalp->ival = c & 0xff; + ct_ret(UNEXPECTED); + } + return 0; /* never get here make lint happy */ +} + +/* collect a decimal constant in temp_buff. + Return the value and error conditions by reference */ + +static mawk_num_t collect_decimal(mawk_state_t *MAWK, YYSTYPE *lvalp, int c, int *flag) +{ + register unsigned char *p = (unsigned char *) string_buff + 1; + unsigned char *endp; + mawk_num_t d; + + *flag = 0; + string_buff[0] = c; + + if (c == '.') { + if (MAWK->scan_code[*p++ = next(MAWK)] != SC_DIGIT) { + *flag = UNEXPECTED; + lvalp->ival = '.'; + return MAWK_NUM_ZERO; + } + } + else { + while (MAWK->scan_code[*p++ = next(MAWK)] == SC_DIGIT); + if (p[-1] != '.') { + un_next(); + p--; + } + } + /* get rest of digits after decimal point */ + while (MAWK->scan_code[*p++ = next(MAWK)] == SC_DIGIT); + + /* check for exponent */ + if (p[-1] != 'e' && p[-1] != 'E') { + un_next(); + *--p = 0; + } + else { /* get the exponent */ + + if (MAWK->scan_code[*p = next(MAWK)] != SC_DIGIT && *p != '-' && *p != '+') { + *++p = 0; + *flag = BAD_DECIMAL; + return MAWK_NUM_ZERO; + } + else { /* get the rest of the exponent */ + + p++; + while (MAWK->scan_code[*p++ = next(MAWK)] == SC_DIGIT); + un_next(); + *--p = 0; + } + } + + errno = 0; /* check for mawk_overflow/underflow */ + d = strtonum(string_buff, (char **) &endp); + +#ifndef STRTOD_UNDERFLOW_ON_ZERO_BUG + if (errno) + mawk_compile_error(MAWK, "%s : decimal %sflow", string_buff, d == 0.0 ? "under" : "over"); +#else /* ! sun4 bug */ + if (errno && d != 0.0) + mawk_compile_error(MAWK, "%s : decimal mawk_overflow", string_buff); +#endif + + if (endp < p) { + *flag = BAD_DECIMAL; + return 0.0; + } + return d; +} + +/*---------- process escape characters ---------------*/ +static int collect_string(mawk_state_t * MAWK, YYSTYPE *lvalp) +{ + register unsigned char *p = (unsigned char *) string_buff; + int c; + int e_flag = 0; /* on if have an escape char */ + + while (1) + switch (MAWK->scan_code[*p++ = next(MAWK)]) { + case SC_DQUOTE: /* done */ + *--p = 0; + goto out; + + case SC_NL: + p[-1] = 0; + /* fall thru */ + + case 0: /* unterminated string */ + mawk_compile_error(MAWK, "runaway string constant \"%.10s ...", string_buff, MAWK->token_lineno); + mawk_exitval(MAWK, 2, -1); + + case SC_ESCAPE: + if ((c = next(MAWK)) == '\n') { + p--; + MAWK->lineno++; + } + else if (c == 0) + un_next(); + else { + *p++ = c; + e_flag = 1; + } + + break; + + default: + break; + } + +out: + lvalp->ptr = (PTR) mawk_new_STRING(MAWK, e_flag ? mawk_rm_escape(MAWK, string_buff) + : string_buff); + return STRING_; +} + + +static int collect_RE(mawk_state_t * MAWK, YYSTYPE *lvalp) +{ + register unsigned char *p = (unsigned char *) string_buff; + int c; + mawk_string_t *sval; + + while (1) + switch (MAWK->scan_code[*p++ = next(MAWK)]) { + case SC_DIV: /* done */ + *--p = 0; + goto out; + + case SC_NL: + p[-1] = 0; + /* fall thru */ + + case 0: /* unterminated re */ + mawk_compile_error(MAWK, "runaway regular expression /%.10s ...", string_buff, MAWK->token_lineno); + mawk_exitval(MAWK, 2, -1); + + case SC_ESCAPE: + switch (c = next(MAWK)) { + case '/': + p[-1] = '/'; + break; + + case '\n': + p--; + break; + + case 0: + un_next(); + break; + + default: + *p++ = c; + break; + } + break; + } + +out: + /* now we've got the RE, so compile it */ + sval = mawk_new_STRING(MAWK, string_buff); + lvalp->ptr = mawk_re_compile(MAWK, sval); + free_STRING(sval); + return RE; +} + +void mawk_parser_push(mawk_state_t * MAWK) +{ + if (MAWK->pstack_used >= MAWK->pstack_alloced) { + MAWK->pstack_alloced += 8; + MAWK->mawk_parser_stack = realloc(MAWK->mawk_parser_stack, sizeof(mawk_parse_state_t) * MAWK->pstack_alloced); + } + memcpy(&MAWK->mawk_parser_stack[MAWK->pstack_used], &MAWK->ps, sizeof(mawk_parse_state_t)); + MAWK->pstack_used++; + memset(&MAWK->ps, 0, sizeof(mawk_parse_state_t)); +} + +int mawk_parser_pop(mawk_state_t * MAWK) +{ + if (MAWK->pstack_used <= 0) + return 0; + + mawk_zfree(MAWK, MAWK->ps.buffer, BUFFSZ + 1); + MAWK->ps.buffer = NULL; + MAWK->ps.buffp = NULL; + + MAWK->pstack_used--; + memcpy(&MAWK->ps, &MAWK->mawk_parser_stack[MAWK->pstack_used], sizeof(mawk_parse_state_t)); + + if (MAWK->pstack_used == 0) { + free(MAWK->mawk_parser_stack); + MAWK->mawk_parser_stack = NULL; + MAWK->pstack_alloced = 0; + } + + return 1; +} + +/* error handling */ + +void mawk_unexpected_char(mawk_state_t * MAWK, YYSTYPE *lvalp) +{ + int c = lvalp->ival; + + fprintf(stderr, "%s: %u: ", MAWK->progname, MAWK->token_lineno); + if (c > ' ' && c < 127) + fprintf(stderr, "unexpected character '%c'\n", c); + else + fprintf(stderr, "unexpected character 0x%02x\n", c); +} + + +static void missing(mawk_state_t *, int, const char *, int); + +static const struct token_str { + short token; + char *str; +} token_str[] = { /* read-only */ + {EOF, "end of file"}, + {NL, "end of line"}, + {SEMI_COLON, ";"}, + {LBRACE, "{"}, + {RBRACE, "}"}, + {SC_FAKE_SEMI_COLON, "}"}, + {LPAREN, "("}, + {RPAREN, ")"}, + {LBOX, "["}, + {RBOX, "]"}, + {QMARK, "?"}, + {COLON, ":"}, + {OR, "||"}, + {AND, "&&"}, + {ASSIGN, "="}, + {ADD_ASG, "+="}, + {SUB_ASG, "-="}, + {MUL_ASG, "*="}, + {DIV_ASG, "/="}, + {MOD_ASG, "%="}, + {POW_ASG, "^="}, + {EQ, "=="}, + {NEQ, "!="}, + {LT, "<"}, + {LTE, "<="}, + {GT, ">"}, + {GTE, ">="}, + {MATCH, NULL}, + {PLUS, "+"}, + {MINUS, "-"}, + {MUL, "*"}, + {DIV, "/"}, + {MOD, "%"}, + {POW, "^"}, + {NOT, "!"}, + {COMMA, ","}, + {INC_or_DEC, NULL}, + {DOUBLE, NULL}, + {STRING_, NULL}, + {ID, NULL}, + {FUNCT_ID, NULL}, + {BUILTIN, NULL}, + {IO_OUT, NULL}, + {IO_IN, "<"}, + {PIPE, "|"}, + {DOLLAR, "$"}, + {FIELD, "$"}, + {0, NULL} +}; + +/* if paren_cnt >0 and we see one of these, we are missing a ')' */ +static const int missing_rparen[] = /* read-only */ +{ EOF, NL, SEMI_COLON, SC_FAKE_SEMI_COLON, RBRACE, 0 }; + +/* ditto for '}' */ +static const int missing_rbrace[] = /* read-only */ +{ EOF, BEGIN, END, 0 }; + +void Mawk_error(mawk_state_t *MAWK, char *s_unused) +{ + const struct token_str *p; + const int *ip; + const char *s; + + s = (char *) 0; + + for (p = token_str; p->token; p++) + if (MAWK->current_token == p->token) { + s = (p->str == NULL) ? string_buff : p->str; + break; + } + + if (!s) /* search the keywords */ + s = mawk_find_kw_str(MAWK->current_token); + + if (s) { + if (MAWK->paren_cnt) + for (ip = missing_rparen; *ip; ip++) + if (*ip == MAWK->current_token) { + missing(MAWK, ')', s, MAWK->token_lineno); + MAWK->paren_cnt = 0; + goto done; + } + + if (MAWK->brace_cnt) + for (ip = missing_rbrace; *ip; ip++) + if (*ip == MAWK->current_token) { + missing(MAWK, CL_BRACE, s, MAWK->token_lineno); + MAWK->brace_cnt = 0; + goto done; + } + + mawk_compile_error(MAWK, "syntax error at or near %s", s); + + } + else /* special cases */ + switch (MAWK->current_token) { + case UNEXPECTED: + mawk_unexpected_char(MAWK, (YYSTYPE *)MAWK->lvalp); + goto done; + + case BAD_DECIMAL: + mawk_compile_error(MAWK, "syntax error in decimal constant %s", string_buff); + break; + + case RE: + mawk_compile_error(MAWK, "syntax error at or near /%s/", string_buff); + break; + + default: + mawk_compile_error(MAWK, "syntax error"); + break; + } + return; + +done: + if (++(MAWK->compile_error_count) == MAX_COMPILE_ERRORS) + mawk_exit(MAWK, 2); +} + +static void missing(mawk_state_t *MAWK, int c, const char *n, int ln) +{ + const char *s0, *s1; + + if (MAWK->ps.pfile_name) { + s0 = MAWK->ps.pfile_name; + s1 = ": "; + } + else + s0 = s1 = ""; + + mawk_errmsg(MAWK, 0, "%s%sline %u: missing %c near %s", s0, s1, ln, c, n); +} diff --git a/src/libmawk/scan.h b/src/libmawk/scan.h new file mode 100644 index 0000000..b5dbf3e --- /dev/null +++ b/src/libmawk/scan.h @@ -0,0 +1,98 @@ + +/******************************************** +scan.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef SCAN_H_INCLUDED +#define SCAN_H_INCLUDED 1 + + +#ifndef MAKESCAN +#include +#include + +void mawk_parser_push(mawk_state_t * MAWK); +int mawk_parser_pop(mawk_state_t * MAWK); +int mawk_scan_open(mawk_state_t * MAWK); + +void mawk_parse(mawk_state_t *); +int Mawk_lex(YYSTYPE *lvalp, mawk_state_t *); +int Mawk_parse(mawk_state_t *); +void Mawk_error(mawk_state_t *MAWK, char *s_unused); +void mawk_scan_cleanup(mawk_state_t *); +void mawk_unexpected_char(mawk_state_t * MAWK, YYSTYPE *lvalp); +#endif + + +extern const char mawk_scan_code[256]; /* read-only */ + +/* the scan codes to compactify the main switch */ + +#define SC_SPACE 1 +#define SC_NL 2 +#define SC_SEMI_COLON 3 +#define SC_FAKE_SEMI_COLON 4 +#define SC_LBRACE 5 +#define SC_RBRACE 6 +#define SC_QMARK 7 +#define SC_COLON 8 +#define SC_OR 9 +#define SC_AND 10 +#define SC_PLUS 11 +#define SC_MINUS 12 +#define SC_MUL 13 +#define SC_DIV 14 +#define SC_MOD 15 +#define SC_POW 16 +#define SC_LPAREN 17 +#define SC_RPAREN 18 +#define SC_LBOX 19 +#define SC_RBOX 20 +#define SC_IDCHAR 21 +#define SC_DIGIT 22 +#define SC_DQUOTE 23 +#define SC_ESCAPE 24 +#define SC_COMMENT 25 +#define SC_EQUAL 26 +#define SC_NOT 27 +#define SC_LT 28 +#define SC_GT 29 +#define SC_COMMA 30 +#define SC_DOT 31 +#define SC_MATCH 32 +#define SC_DOLLAR 33 +#define SC_UNEXPECTED 34 +#define SC_INCLUDE 35 + +#ifndef MAKESCAN + +void mawk_eat_nl(mawk_state_t * MAWK, YYSTYPE *lvalp); + +#define ct_ret(x) return MAWK->current_token = (x) + +#define next(MAWK) (*MAWK->ps.buffp ? *MAWK->ps.buffp++ : slow_next(MAWK)) +#define un_next() MAWK->ps.buffp-- + +#define mawk_test1_ret(c,x,d) if ( next(MAWK) == (c) ) ct_ret(x) ;\ + else { un_next() ; ct_ret(d) ; } + +#define mawk_test2_ret(c1,x1,c2,x2,d) switch( next(MAWK) )\ + { case c1: ct_ret(x1) ;\ + case c2: ct_ret(x2) ;\ + default: un_next() ;\ + ct_ret(d) ; } +#endif /* ! MAKESCAN */ + + +#endif diff --git a/src/libmawk/scancode.c b/src/libmawk/scancode.c new file mode 100644 index 0000000..55f0ec1 --- /dev/null +++ b/src/libmawk/scancode.c @@ -0,0 +1,23 @@ + + +/* scancode.c */ + + +const char mawk_scan_code[256] = { + 0,34,34,34,34,34,34,34,34, 1, 2, 1, 1, 1,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, + 1,27,23,25,33,15,10,34,17,18,13,11,30,12,31,14, +22,22,22,22,22,22,22,22,22,22, 8, 3,28,26,29, 7, +34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21, +21,21,21,21,21,21,21,21,21,21,21,19,24,20,16,21, +34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21, +21,21,21,21,21,21,21,21,21,21,21, 5, 9, 6,32,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, +34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34 +} ; diff --git a/src/libmawk/sizes.h b/src/libmawk/sizes.h new file mode 100644 index 0000000..726c47e --- /dev/null +++ b/src/libmawk/sizes.h @@ -0,0 +1,61 @@ + +/******************************************** +sizes.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, 1992. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef SIZES_H +#define SIZES_H + +#ifndef MAX__INT +#include +#define MAX__INT INT_MAX +#define MAX__LONG LONG_MAX +#endif /* MAX__INT */ + +#if MAX__INT <= 0x7fff +#define SHORT_INTS +#define INT_FMT "%ld" +typedef long Int; +#define Max_Int MAX__LONG +#else +#define INT_FMT "%d" +typedef int Int; +#define Max_Int MAX__INT +#endif + +#define EVAL_STACK_SIZE 256 /* initial size , can grow */ +/* number of fields at startup, must be a power of 2 + and FBANK_SZ-1 must be divisible by 3! */ +#define FBANK_SZ 256 +#define FB_SHIFT 8 /* lg(FBANK_SZ) */ +#define NUM_FBANK 128 /* see MAX_FIELD below */ + + +#define MAX_SPLIT (FBANK_SZ-1) /* needs to be divisble by 3 */ +#define MAX_FIELD (NUM_FBANK*FBANK_SZ - 1) + +#define MIN_SPRINTF 400 + + +#define BUFFSZ 4096 + /* starting buffer size for input files, grows if + necessary */ + +#define HASH_PRIME 53 +#define A_HASH_PRIME 199 + + +#define MAX_COMPILE_ERRORS 5 /* quit if more than 4 errors */ + +#endif /* SIZES_H */ diff --git a/src/libmawk/split.c b/src/libmawk/split.c new file mode 100644 index 0000000..537f228 --- /dev/null +++ b/src/libmawk/split.c @@ -0,0 +1,291 @@ + +/******************************************** +split.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* For all splitting up to MAX_SPLIT fields go into + split_buff[], the rest go onto split_ov_list ( split + mawk_overflow list) + + We can split one of three ways: + (1) By space: + mawk_space_split() and space_ov_split() + (2) By regular expression: + mawk_re_split() and re_ov_split() + (3) By "" (null -- split into characters) + mawk_null_split() and null_ov_split() +*/ + +#define TEMPBUFF_GOES_HERE + +#include "mawk.h" +#include "symtype.h" +#include "bi_vars.h" +#include "bi_funct.h" +#include "memory.h" +#include "scan.h" +#include "regexp.h" +#include "field.h" + +static int re_ov_split(mawk_state_t *, char *, PTR); +static int space_ov_split(mawk_state_t *, char *, char *); +static int null_ov_split(mawk_state_t *, char *); + +/* split string s of length slen on SPACE without changing s. + load the pieces into STRINGS and ptrs into + split_buff[] + return the number of pieces */ + +int mawk_space_split(mawk_state_t *MAWK, register char *s, unsigned slen) +{ + char *back = s + slen; + int i = 0; + int len; + char *q; + mawk_string_t *sval; + int lcnt = MAX_SPLIT / 3; + +#define EAT_SPACE() while ( MAWK->scan_code[*(unsigned char*)s] ==\ + SC_SPACE ) s++ +#define EAT_NON_SPACE() \ + *back = ' ' ; /* sentinel */\ + while ( MAWK->scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\ + *back = 0 + + + while (lcnt--) { + EAT_SPACE(); + if (*s == 0) + goto done; + /* mark the front with q */ + q = s++; + EAT_NON_SPACE(); + sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q); + memcpy(sval->str, q, len); + + EAT_SPACE(); + if (*s == 0) + goto done; + q = s++; + EAT_NON_SPACE(); + sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q); + memcpy(sval->str, q, len); + + EAT_SPACE(); + if (*s == 0) + goto done; + q = s++; + EAT_NON_SPACE(); + sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = s - q); + memcpy(sval->str, q, len); + + } + /* we've mawk_overflowed */ + return i + space_ov_split(MAWK, s, back); + +done: + return i; +} + +static int space_ov_split(mawk_state_t *MAWK, register char *s, char *back) +{ + SPLIT_OV dummy; + register SPLIT_OV *tail = &dummy; + char *q; + int cnt = 0; + unsigned len; + + while (1) { + EAT_SPACE(); + if (*s == 0) + break; /* done */ + q = s++; + EAT_NON_SPACE(); + + tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV); + tail->sval = mawk_new_STRING0(MAWK, len = s - q); + memcpy(tail->sval->str, q, len); + cnt++; + } + + tail->link = (SPLIT_OV *) 0; + MAWK->split_ov_list = dummy.link; + return cnt; +} + +/* match a string with a regular expression, but + only matches of positive length count */ +char *mawk_re_pos_match(mawk_state_t *MAWK, register char *s, PTR re, unsigned *lenp) +{ + while ((s = mawk_REmatch(MAWK, s, re, lenp, 0))) + if (*lenp) + return s; + else if (*s == 0) + break; + else + s++; + + return (char *) 0; +} + +int mawk_re_split(mawk_state_t *MAWK, char *s, PTR re) +{ + register char *t; + int i = 0; + unsigned mlen, len; + mawk_string_t *sval; + int lcnt = MAX_SPLIT / 3; + + while (lcnt--) { + if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen))) + goto done; + sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s); + memcpy(sval->str, s, len); + s = t + mlen; + + if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen))) + goto done; + sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s); + memcpy(sval->str, s, len); + s = t + mlen; + + if (!(t = mawk_re_pos_match(MAWK, s, re, &mlen))) + goto done; + sval = split_buff[i++] = mawk_new_STRING0(MAWK, len = t - s); + memcpy(sval->str, s, len); + s = t + mlen; + } + /* we've mawk_overflowed */ + return i + re_ov_split(MAWK, s, re); + +done: + split_buff[i++] = mawk_new_STRING(MAWK, s); + return i; +} + +/* + we've mawk_overflowed split_buff[] , put + the rest on the split_ov_list + return number of pieces +*/ + +static int re_ov_split(mawk_state_t *MAWK, char *s, PTR re) +{ + SPLIT_OV dummy; + register SPLIT_OV *tail = &dummy; + int cnt = 1; + char *t; + unsigned len, mlen; + + while ((t = mawk_re_pos_match(MAWK, s, re, &mlen))) { + tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV); + tail->sval = mawk_new_STRING0(MAWK, len = t - s); + memcpy(tail->sval->str, s, len); + s = t + mlen; + cnt++; + } + /* and one more */ + tail = tail->link = MAWK_ZMALLOC(MAWK, SPLIT_OV); + tail->sval = mawk_new_STRING(MAWK, s); + tail->link = (SPLIT_OV *) 0; + MAWK->split_ov_list = dummy.link; + + return cnt; +} + + +int mawk_null_split(mawk_state_t *MAWK, char *s) +{ + int cnt = 0; /* number of fields split */ + mawk_string_t *sval; + int i = 0; /* indexes split_buff[] */ + + while (*s) { + if (cnt == MAX_SPLIT) + return cnt + null_ov_split(MAWK, s); + + sval = mawk_new_STRING0(MAWK, 1); + sval->str[0] = *s++; + split_buff[i++] = sval; + cnt++; + } + return cnt; +} + +static int null_ov_split(mawk_state_t *MAWK, char *s) +{ + SPLIT_OV dummy; + SPLIT_OV *ovp = &dummy; + int cnt = 0; + + while (*s) { + ovp = ovp->link = MAWK_ZMALLOC(MAWK, SPLIT_OV); + ovp->sval = mawk_new_STRING0(MAWK, 1); + ovp->sval->str[0] = *s++; + cnt++; + } + ovp->link = (SPLIT_OV *) 0; + MAWK->split_ov_list = dummy.link; + return cnt; +} + + +/* split(s, X, r) + split s into array X on r + + entry: sp[0] holds r + sp[-1] pts at X + sp[-2] holds s +*/ +mawk_cell_t *mawk_bi_split(mawk_state_t *MAWK, register mawk_cell_t *sp) +{ + int cnt; /* the number of pieces */ + + + if (sp->type < C_RE) + mawk_cast_for_split(MAWK, sp); + /* can be C_RE, C_SPACE or C_SNULL */ + sp -= 2; + if (sp->type < C_STRING) + mawk_cast1_to_str(MAWK, sp); + + if (string(sp)->len == 0) /* nothing to split */ + cnt = 0; + else + switch ((sp + 2)->type) { + case C_RE: + cnt = mawk_re_split(MAWK, string(sp)->str, (sp + 2)->ptr); + break; + + case C_SPACE: + cnt = mawk_space_split(MAWK, string(sp)->str, string(sp)->len); + break; + + case C_SNULL: /* split on empty string */ + cnt = mawk_null_split(MAWK, string(sp)->str); + break; + + default: + mawk_bozo(MAWK, "bad splitting cell in bi_split"); + } + + + free_STRING(string(sp)); + sp->type = C_NUM; + sp->d.dval = (mawk_num_t) cnt; + + mawk_array_load(MAWK, (mawk_array_t) (sp + 1)->ptr, cnt); + + return sp; +} diff --git a/src/libmawk/split.h b/src/libmawk/split.h new file mode 100644 index 0000000..74bad80 --- /dev/null +++ b/src/libmawk/split.h @@ -0,0 +1,58 @@ +/* Walk the split buffer (first the list above SPLIT_MAX then the + array below SPLIT_MAX, unordered walk) and run action_macro + for each item. Action_macro is pasted twice: once for the list, + then for the array. + + cnt is the number of fields. + + If free_list is non-zero, the ov list items are freed after + action_macro. Not deleting the list is useful if multiple runs + are required. + + Calling convention for the action macro is: + action(idx, sval) + where idx is an integer index numbered from 0 and sval is a mawk_string_t. +*/ +#define mawk_split_walk(MAWK, cnt, free_list, action_macro) \ +do { \ + int spwlk__cnt = cnt; \ + mawk_split_walk_top(MAWK, spwlk__cnt, free_list, action_macro); \ + mawk_split_walk_bottom(MAWK, spwlk__cnt, action_macro); \ +} while(0) + + +/* walk items above MAX_SPLIT from a linked list + sets cnt to MAX_SPLIT if it was larger than MAX_SPLIT; + NOTE: this is half of the job mawk_split_walk does */ +#define mawk_split_walk_top(MAWK, cnt, free_list, action_macro) \ +do { \ + if (cnt > MAX_SPLIT) { \ + SPLIT_OV *spwlk__p = MAWK->split_ov_list; \ + SPLIT_OV *spwlk__q; \ + int spwlk__i; \ + MAWK->split_ov_list = (SPLIT_OV *) 0; \ + spwlk__i = MAX_SPLIT; \ + while (spwlk__p) { \ + { action_macro(spwlk__i, (spwlk__p->sval)); } \ + spwlk__q = spwlk__p; \ + spwlk__p = spwlk__q->link; \ + if (free_list) \ + MAWK_ZFREE(MAWK, spwlk__q); \ + spwlk__i++; \ + } \ + cnt = MAX_SPLIT; \ + } \ +} while(0) + +/* walk items below MAX_SPLIT from the split buff, up to cnt; + cnt must not be larger or equal to MAX_SPLIT! cnt is not modified + NOTE: this is half of the job mawk_split_walk does */ +#define mawk_split_walk_bottom(MAWK, cnt, action_macro) \ +do { \ + int spwlk__i; \ + for (spwlk__i = 0; spwlk__i < cnt; spwlk__i++) { \ + action_macro(spwlk__i, split_buff[spwlk__i]); \ + } \ +} while(0) + + diff --git a/src/libmawk/str.c b/src/libmawk/str.c new file mode 100644 index 0000000..0b556da --- /dev/null +++ b/src/libmawk/str.c @@ -0,0 +1,128 @@ + +/******************************************** +str.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + + +#include "mawk.h" +#include "scan.h" + +static const char hex_val['f' - 'A' + 1] = /* read-only */ +{ + 10, 11, 12, 13, 14, 15, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 10, 11, 12, 13, 14, 15 +}; + + +#define isoctal(x) ((x)>='0'&&(x)<='7') + +#define hex_value(x) hex_val[(x)-'A'] + +#define ishex(x) (MAWK->scan_code[x] == SC_DIGIT ||\ + ('A' <= (x) && (x) <= 'f' && hex_value(x))) + +static int octal(char **); +static int hex(mawk_state_t *, char **); + +/* process one , two or three octal digits + moving a pointer forward by reference */ +static int octal(char **start_p) +{ + register char *p = *start_p; + register unsigned x; + + x = *p++ - '0'; + if (isoctal(*p)) { + x = (x << 3) + *p++ - '0'; + if (isoctal(*p)) + x = (x << 3) + *p++ - '0'; + } + *start_p = p; + return x & 0xff; +} + +/* process one or two hex digits + moving a pointer forward by reference */ + +static int hex(mawk_state_t *MAWK, char **start_p) +{ + register unsigned char *p = (unsigned char *) *start_p; + register unsigned x; + unsigned t; + + if (MAWK->scan_code[*p] == SC_DIGIT) + x = *p++ - '0'; + else + x = hex_value(*p++); + + if (MAWK->scan_code[*p] == SC_DIGIT) + x = (x << 4) + *p++ - '0'; + else if ('A' <= *p && *p <= 'f' && (t = hex_value(*p))) { + x = (x << 4) + t; + p++; + } + + *start_p = (char *) p; + return x; +} + + +/* process the escape characters in a string, in place . */ +char *mawk_rm_escape(mawk_state_t *MAWK, char *s) +{ + register char *p, *q; + char *t; + int i; + + q = p = s; + + while (*p) { + if (*p == '\\') { + MAWK->escape_test[ET_END].in = *++p; /* sentinal */ + i = 0; + while (MAWK->escape_test[i].in != *p) + i++; + + if (i != ET_END) { /* in table */ + p++; + *q++ = MAWK->escape_test[i].out; + } + else if (isoctal(*p)) { + t = p; + *q++ = octal(&t); + p = t; + } + else if ((*p == 'x') && (ishex(*(unsigned char *) (p + 1)))) { + t = p + 1; + *q++ = hex(MAWK, &t); + p = t; + } + else if (*p == 0) /* can only happen with command line assign */ + *q++ = '\\'; + else { /* not an escape sequence */ + + *q++ = '\\'; + *q++ = *p++; + } + } + else + *q++ = *p++; + } + + *q = 0; + return s; +} diff --git a/src/libmawk/symtype.h b/src/libmawk/symtype.h new file mode 100644 index 0000000..079cea5 --- /dev/null +++ b/src/libmawk/symtype.h @@ -0,0 +1,118 @@ + +/******************************************** +symtype.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* types related to symbols are defined here */ + +#ifndef SYMTYPE_H +#define SYMTYPE_H + +/*--------------------------- + structures and types for arrays + *--------------------------*/ + +#include + +/* for parsing (i,j) in A */ +typedef struct { + int start; /* offset to code_base */ + int cnt; +} ARG2_REC; + +void mawk_add_to_fdump_list(mawk_state_t *, FBLOCK *); +int mawk_fdump(mawk_state_t * MAWK); + +/*------------------------- + elements of the symbol table + -----------------------*/ + +#define ST_NONE 0 +#define ST_VAR 1 +#define ST_KEYWORD 2 +#define ST_BUILTIN 3 /* a pointer to a builtin record */ +#define ST_ARRAY 4 /* a void * ptr to a mawk_hash table */ +#define ST_FIELD 5 /* a cell ptr to a field */ +#define ST_FUNCT 6 +#define ST_NR 7 /* NR is special */ +/* ST_ENV 8 used to be ENVIRON[] before virtual array support*/ +#define ST_LENGTH 9 /* ditto and mawk_bozo */ +#define ST_LOCAL_NONE 10 +#define ST_LOCAL_VAR 11 +#define ST_LOCAL_ARRAY 12 +#define ST_C_FUNCTION 13 /* call from the awk script to a c function - stores a function pointer */ + +#define is_local(stp) ((stp)->type>=ST_LOCAL_NONE) + + +/***************************** + structures for type checking function calls + ******************************/ + +typedef struct ca_rec { + struct ca_rec *link; + short type; + short arg_num; /* position in callee's stack */ +/*--------- this mawk_data only set if we'll need to patch -------*/ +/* happens if argument is an ID or type ST_NONE or ST_LOCAL_NONE */ + + int call_offset; +/* where the type is stored */ + SYMTAB *sym_p; /* if type is ST_NONE */ + char *type_p; /* if type is ST_LOCAL_NONE */ +} CA_REC; /* call argument record */ + +/* type field of CA_REC matches with ST_ types */ +#define CA_EXPR ST_LOCAL_VAR +#define CA_ARRAY ST_LOCAL_ARRAY + +struct fcall { + struct fcall *link; + FBLOCK *callee; + short call_scope; + short move_level; + FBLOCK *call; /* only used if call_scope == SCOPE_FUNCT */ + INST *call_start; /* computed later as code may be moved */ + CA_REC *arg_list; + short arg_cnt_checked; + unsigned line_no; /* for error messages */ +}; + +extern FCALL_REC *resolve_list; + +void mawk_resolve_fcalls(mawk_state_t * MAWK); +void mawk_check_fcall(mawk_state_t *, FBLOCK *, int, int, FBLOCK *, CA_REC *, unsigned); +void mawk_relocate_resolve_list(mawk_state_t *MAWK, int, int, FBLOCK *, int, unsigned, int); + +/* mawk_hash.c */ +unsigned mawk_hash(const char *); + +/* register a symbol; name is not copied, the pointer is stored in the table! */ +SYMTAB *mawk_insert(mawk_state_t *, const char *name); + +#ifdef MAWK_MEM_PEDANTIC +/* remove a symbol */ +void mawk_delete(mawk_state_t *MAWK, const char *name, int cell_destroy); +#endif + +SYMTAB *mawk_find(mawk_state_t *, const char *, int); +extern const char *mawk_reverse_uk; +const char *mawk_reverse_find(mawk_state_t *, int, PTR); +SYMTAB *mawk_save_id(mawk_state_t * MAWK, const char *); +void mawk_restore_ids(mawk_state_t * MAWK); + +/* error.c */ +void mawk_type_error(mawk_state_t *, SYMTAB *); + +#endif /* SYMTYPE_H */ diff --git a/src/libmawk/test_3rd/Test.conf.in b/src/libmawk/test_3rd/Test.conf.in new file mode 100644 index 0000000..b9fb18a --- /dev/null +++ b/src/libmawk/test_3rd/Test.conf.in @@ -0,0 +1,11 @@ +# stop after the first test that fails +#STOP_WHEN_TEST_FAILS=1 + +# change which awk implementation to test +#AWK=mawk + +# change where vendor and the 3rd party tests are checked out +#TROOT_DIR=../../../../vendor/3rd_tests + +# change which tests to run +#TEST_FILES=g.*.sh diff --git a/src/libmawk/test_3rd/Test.sh b/src/libmawk/test_3rd/Test.sh new file mode 100755 index 0000000..9e9e3e3 --- /dev/null +++ b/src/libmawk/test_3rd/Test.sh @@ -0,0 +1,178 @@ +#!/bin/sh + +AWK=../lmawk +TROOT_DIR=../../../../vendor/3rd_tests + +if test -f ./Test.conf +then + . ./Test.conf +fi + +if test -z "$TEST_FILES" +then + TEST_FILES=g.*.sh +fi + +if test -z "$TEST_GAWK_DEFAULT" -a -f gawk_default.list +then + TEST_GAWK_DEFAULT=`grep -v "^#" gawk_default.list` +fi + + +if test -z "$TEST_GAWK_ERROR" -a -f gawk_error.list +then + TEST_GAWK_ERROR=`grep -v "^#" gawk_error.list` +fi + +GAWK_DIR=$TROOT_DIR/gawk-4.1.1/test +BWK_DIR=$TROOT_DIR/bwk/test + +pass_cnt=0 +fail_cnt=0 +unkn_cnt=0 + +implementation=`basename $AWK` + +announce() +{ + announced=1 + if test "$1" = 0 + then + echo -n "pass" + pass_cnt=$(($pass_cnt + 1)) + else + echo -n "FAIL" + fail_cnt=$(($fail_cnt + 1)) + fail_list="$fail_list $2" + echo "$3" > "$2.diff" + if test ! -z "$STOP_WHEN_TEST_FAILS" + then + echo "" + echo "First failure, have to stop because STOP_WHEN_TEST_FAILS sais so." + exit 1 + fi + fi + if test ! -z "$4" -a "$1" = 0 + then + rm "$4" + fi +} + +pre_ann() +{ + announced=0 + echo -n "$n: " +} + +post_ann() +{ + if test "$announced" -gt 0 + then + echo "" + else + echo "???" + unkn_cnt=$(($unkn_cnt+1)) + fi +} + +gen_testname() +{ + testname=${n%%.sh} + testname=${testname##g.} + testname=${testname##t.} +} + +gawk_run_() +{ + if test -f "$GAWK_DIR/$1.in" + then + $AWK -v "SRCDIR=$GAWK_DIR" -v "srcdir=$GAWK_DIR" -f "$GAWK_DIR/$1.awk" <"$GAWK_DIR/$1.in" >"$1.out" 2>&1 + else + echo "" | $AWK -v "SRCDIR=$GAWK_DIR" -v "srcdir=$GAWK_DIR" -f "$GAWK_DIR/$1.awk" >"$1.out" 2>&1 + fi +} + +gawk_default_test() +{ + local okfile + gawk_run_ "$1" + if test -f "local.$implementation/$1.ok" + then + okfile="local.$implementation/$1.ok" + else + okfile="$GAWK_DIR/$1.ok" + fi + dif=`diff -u "$okfile" "$1.out"` + announce "$?" "$1" "$dif" "$1.out" +} + +gawk_error_test() +{ + local res err + gawk_run_ "$1" + if test "$?" == 0 + then + # should have returned false but it's true! + res=1 + echo "" + echo "*** exit status 0 ***" >> "$1.out" + else + case $AWK in + *mawk*) + # it's false, but check for a compile-time error + err=`grep ": line [0-9]\+:" "$1.out"; grep ": run time error:" "$1.out"` + if test -z "$err" + then + res=1 + echo "*** doesn't look like a compile time or run time error ***" >> "$1.out" + else + res=0 + fi + ;; + *) res=0 ;; + esac + fi + announce "$res" "$1" "$dif" "$1.out" +} + +if test ! -z "$TEST_FILES" +then + for n in $TEST_FILES + do + gen_testname $n + pre_ann $n + . ./$n + post_ann + done +fi + +if test ! -z "$TEST_GAWK_DEFAULT" +then + for n_ in $TEST_GAWK_DEFAULT + do + n=g.$n_ + testname=$n_ + pre_ann + gawk_default_test $testname + post_ann + done +fi + +if test ! -z "$TEST_GAWK_ERROR" +then + for n_ in $TEST_GAWK_ERROR + do + n=g.$n_ + testname=$n_ + pre_ann + gawk_error_test $testname + post_ann + done +fi + +echo "================" +echo "All: $(($pass_cnt + $fail_cnt + $unkn_cnt)) pass: $pass_cnt ???: $unkn_cnt fail: $fail_cnt" +if test ! -z "$fail_list" +then + echo "Tests failed:$fail_list" +fi diff --git a/src/libmawk/test_3rd/g.argarray.sh b/src/libmawk/test_3rd/g.argarray.sh new file mode 100644 index 0000000..f4a7884 --- /dev/null +++ b/src/libmawk/test_3rd/g.argarray.sh @@ -0,0 +1,7 @@ +fn=argarray + +cp $GAWK_DIR/$fn.in $fn.in +echo "" | $AWK -f $GAWK_DIR/$fn.awk $fn.in >$fn.out 2>&1 +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" +rm $fn.in diff --git a/src/libmawk/test_3rd/g.argtest.sh b/src/libmawk/test_3rd/g.argtest.sh new file mode 100644 index 0000000..2405d6b --- /dev/null +++ b/src/libmawk/test_3rd/g.argtest.sh @@ -0,0 +1,6 @@ +fn=argtest + +echo "" | $AWK -f $GAWK_DIR/$fn.awk $fn.in Makefile >$fn.out 2>&1 +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" + diff --git a/src/libmawk/test_3rd/g.compare.sh b/src/libmawk/test_3rd/g.compare.sh new file mode 100644 index 0000000..8e6b871 --- /dev/null +++ b/src/libmawk/test_3rd/g.compare.sh @@ -0,0 +1,7 @@ +fn=compare + +cp $GAWK_DIR/$fn.in $fn.in +echo "" | $AWK -f $GAWK_DIR/$fn.awk WILL_BE_DELETED $fn.in $fn.in >$fn.out 2>&1 +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" +rm $fn.in diff --git a/src/libmawk/test_3rd/g.devfd1.sh b/src/libmawk/test_3rd/g.devfd1.sh new file mode 100644 index 0000000..0c159dc --- /dev/null +++ b/src/libmawk/test_3rd/g.devfd1.sh @@ -0,0 +1,9 @@ +fn=devfd1 + +cp $GAWK_DIR/devfd.in1 $fn.in +$AWK -f $GAWK_DIR/$fn.awk $fn.in $fn.in 4<$GAWK_DIR/devfd.in4 5<$GAWK_DIR/devfd.in5 >$fn.out 2>&1 + + +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" +rm $fn.in diff --git a/src/libmawk/test_3rd/g.exitval1.sh b/src/libmawk/test_3rd/g.exitval1.sh new file mode 100644 index 0000000..d5e76ca --- /dev/null +++ b/src/libmawk/test_3rd/g.exitval1.sh @@ -0,0 +1,7 @@ +fn=exitval1 + +echo "hello world" | $AWK -f $GAWK_DIR/$fn.awk $fn.in >$fn.out 2>&1 +echo "EXITVAL=$?" >> $fn.out +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" + diff --git a/src/libmawk/test_3rd/g.fcall_exit.sh b/src/libmawk/test_3rd/g.fcall_exit.sh new file mode 100644 index 0000000..3083f5e --- /dev/null +++ b/src/libmawk/test_3rd/g.fcall_exit.sh @@ -0,0 +1,11 @@ +fn=fcall_exit + +echo "" >$fn.out +for i in 1 2 3 +do + echo "" | $AWK -f $GAWK_DIR/$fn.awk $i >>$fn.out 2>&1 + echo "EXITVAL=$?" >> $fn.out +done +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" + diff --git a/src/libmawk/test_3rd/g.fcall_exit2.sh b/src/libmawk/test_3rd/g.fcall_exit2.sh new file mode 100644 index 0000000..f8dbd2b --- /dev/null +++ b/src/libmawk/test_3rd/g.fcall_exit2.sh @@ -0,0 +1,14 @@ +fn=fcall_exit2 + +echo "" >$fn.out +echo "oops" > 2 +for i in 1 2 +do + echo "*** $i" >> $fn.out + echo "" | $AWK -f $GAWK_DIR/$fn.awk $GAWK_DIR/$fn.in $i >>$fn.out 2>&1 + echo "EXITVAL=$?" >> $fn.out +done +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" +rm 2 + diff --git a/src/libmawk/test_3rd/g.fsspcoln.sh b/src/libmawk/test_3rd/g.fsspcoln.sh new file mode 100644 index 0000000..57cbe1b --- /dev/null +++ b/src/libmawk/test_3rd/g.fsspcoln.sh @@ -0,0 +1,11 @@ +fn=fsspcoln + +echo "" >$fn.out +for fs in "[ :]" "[ :]+" "[ :]*" "A" +do + echo "*** FS=$fs" >>$fn.out + cat $GAWK_DIR/$fn.in | $AWK -f $GAWK_DIR/$fn.awk -F "$fs" >>$fn.out 2>&1 +done +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" + diff --git a/src/libmawk/test_3rd/g.getline2.sh b/src/libmawk/test_3rd/g.getline2.sh new file mode 100644 index 0000000..75b6bf6 --- /dev/null +++ b/src/libmawk/test_3rd/g.getline2.sh @@ -0,0 +1,6 @@ +fn=getline2 + +echo "" | $AWK -f $GAWK_DIR/$fn.awk $GAWK_DIR/$fn.awk $GAWK_DIR/$fn.awk >$fn.out 2>&1 +dif=`diff -u "$GAWK_DIR/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" + diff --git a/src/libmawk/test_3rd/g.iobug1.sh b/src/libmawk/test_3rd/g.iobug1.sh new file mode 100644 index 0000000..d1d3c97 --- /dev/null +++ b/src/libmawk/test_3rd/g.iobug1.sh @@ -0,0 +1,6 @@ +fn=iobug1 + +echo "" | $AWK -f $GAWK_DIR/$fn.awk >$fn.out 2>&1 +dif=`diff -u "$GAWK_DIR/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" + diff --git a/src/libmawk/test_3rd/g.manyfiles.sh b/src/libmawk/test_3rd/g.manyfiles.sh new file mode 100644 index 0000000..951fe21 --- /dev/null +++ b/src/libmawk/test_3rd/g.manyfiles.sh @@ -0,0 +1,20 @@ +fn=manyfiles + +mkdir junk + +echo "*** stdout:" >$fn.out +echo "foo bar +baz BAZ" | $AWK -f $GAWK_DIR/$fn.awk >>$fn.out 2>&1 + +for n in foo baz +do + echo "*** junk/$n:" + cat junk/$n + rm junk/$n +done >>$fn.out + +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` + +announce "$?" "$fn" "$dif" "$fn.out" +rmdir junk + diff --git a/src/libmawk/test_3rd/g.messages.sh b/src/libmawk/test_3rd/g.messages.sh new file mode 100644 index 0000000..06cfcba --- /dev/null +++ b/src/libmawk/test_3rd/g.messages.sh @@ -0,0 +1,10 @@ +fn=messages + +echo "*** stdout: " >$fn.out +echo "" | $AWK -f $GAWK_DIR/$fn.awk $fn.in >>$fn.out 2>$fn.fd2 +(echo "*** stderr: "; cat $fn.fd2) >>$fn.out +(echo "*** _out1: "; cat _out1) >>$fn.out + +dif=`diff -u "local.$implementation/$fn.ok" "$fn.out"` +announce "$?" "$fn" "$dif" "$fn.out" +rm $fn.fd2 _out1 diff --git a/src/libmawk/test_3rd/gawk_default.list b/src/libmawk/test_3rd/gawk_default.list new file mode 100644 index 0000000..6930337 --- /dev/null +++ b/src/libmawk/test_3rd/gawk_default.list @@ -0,0 +1,187 @@ +# *TODO means it's already in the central TODO list with references to test_3rd +# +addcomma +anchgsub +arrayprm2 +arrayprm3 +arrayref +arrymem1 +arryref2 +arynasty +arynocls +aryprm8 +arysubnm +asgext +back89 +backbigs1 +childin +clobber +closebad +clsflnam +compare2 +concat1 +concat2 +concat3 +concat4 +convfmt +delargv +delarpm2 +delarprm +# delsub TODO: check if we need to reimplement this +# dfastress TODO: fails badly +dynlj +eofsplit +exit2 +exitval2 +fldchg +fldchgnf +# fmtspcl TODO: figure out what to do with the NaNs +fmttest +fnarydel +fnparydl +fordel +forref +fsbs +fsfwfs +fsrs +fstabplus +# funlen *TODO: check posix: should length(array) work? +funsemnl +# getline *TODO: concat precedence over IO +getline3 +getline4 +getline5 +getlnbuf +getlndir +getlnhd +getnr2tb +getnr2tm +gsubtest +gsubtst2 +# gsubtst3 TODO: ??? +# gsubtst4 TODO: extended regex? +gsubtst5 +gsubtst7 +gsubtst8 +hello +hsprint +igncdym +inftest +# inputred concat precedence over IO: it is implementation defined by POSIX, portable awk programs shall use () +intest +intprec +# jarebug *TODO: regex on binary +leadnl +litoct +longsub +manglprm +math +# mbprintf1 TODO: lmawk binary +mbprintf2 +mbprintf3 +# mbprintf4 TODO: lmawk binary +mbstr1 +membug1 +minusstr +mpfrnegzero +mtchi18n +nasty +nasty2 +negexp +negrange +nested +nfldstr +nfloop +nfset +nlfldsep +nlinstr +nlstrina +noloop1 +noloop2 +nonl +# nulrsend TODO: POSIX: RS null = separate by blank line +numindex +numsubstr +octsub +ofmt +ofmta +ofmtfidl +ofmts +# ofs1 TODO: mawk doesn't respect FS or OFS? +onlynl +opasnidx +opasnslf +paramtyp +paramuninitglobal +parse1 +pcntplus +pid +pipeio1 +pipeio2 +posix +posix2008sub +prdupval +prec +# printf0 *TODO: printf with no arg - UB? +printf1 +printfbad3 +# printfloat TODO: very strange format breaks +prmreuse +prt1eval +prtoeval +rand +range1 +rebt8b1 +# rebt8b2 TODO: binary? +rebuf +redfilnm +regeq +regexprange +# regx8bit *TODO: binary regex? +# reindops *TODO: regex "^+" ; should we interpret the + as a literal? +reparse +resplit +rri1 +rs +rsnul1nl +rsstart1 +rstest1 +rstest2 +rstest3 +rstest4 +rstest5 +rstest6 +rswhite +shadow +splitargv +splitarr +splitdef +splitvar +splitwht +sprintfc +strcat1 +strnum1 +subamp +subi18n +subsepnm +subslash +substr +swaplns +tradanch +tweakfld +uninit2 +uninit3 +uninit4 +# uninit5 *TODO: length() on array? +uninitialized +# uparrfs *TODO: ^ in FS +wideidx +wideidx2 +widesub +widesub2 +widesub3 +wjposer1 +# xref regex [:alnum:] +zero2 +zeroe0 +zeroflag diff --git a/src/libmawk/test_3rd/gawk_error.list b/src/libmawk/test_3rd/gawk_error.list new file mode 100644 index 0000000..6a3aad2 --- /dev/null +++ b/src/libmawk/test_3rd/gawk_error.list @@ -0,0 +1,39 @@ +arrayparm +arryref3 +arryref4 +arryref5 +aryprm1 +aryprm2 +aryprm3 +aryprm4 +aryprm5 +aryprm6 +aryprm7 +badassign1 +defref +delfunc +fnamedat +fnarray +fnarray2 +fnaryscl +fnasgnm +fnmisc +funsmnam +gsubasgn +nastyparm +nfneg +nofmtch +noparms +paramdup +paramres +parseme +printfbad1 +printfbad2 +prmarscl +scalar +sclforin +sclifin +synerr1 +synerr2 +unterm + diff --git a/src/libmawk/test_3rd/local.lmawk/argarray.ok b/src/libmawk/test_3rd/local.lmawk/argarray.ok new file mode 100644 index 0000000..7a4e107 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/argarray.ok @@ -0,0 +1,7 @@ +here we have 2 arguments +which are + lmawk + argarray.in +Environment variable TEST= +and the current input file is called "" +in main loop, this input file is known as "argarray.in" diff --git a/src/libmawk/test_3rd/local.lmawk/argtest.ok b/src/libmawk/test_3rd/local.lmawk/argtest.ok new file mode 100644 index 0000000..898fdc3 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/argtest.ok @@ -0,0 +1,3 @@ +ARGV[0] = lmawk +ARGV[1] = argtest.in +ARGV[2] = Makefile diff --git a/src/libmawk/test_3rd/local.lmawk/backgsub.ok b/src/libmawk/test_3rd/local.lmawk/backgsub.ok new file mode 100644 index 0000000..2d3f17f --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/backgsub.ok @@ -0,0 +1 @@ +\x\y\z diff --git a/src/libmawk/test_3rd/local.lmawk/clsflnam.ok b/src/libmawk/test_3rd/local.lmawk/clsflnam.ok new file mode 100644 index 0000000..cefd250 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/clsflnam.ok @@ -0,0 +1 @@ +Error `' closing input file diff --git a/src/libmawk/test_3rd/local.lmawk/compare.ok b/src/libmawk/test_3rd/local.lmawk/compare.ok new file mode 100644 index 0000000..965f25d --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/compare.ok @@ -0,0 +1,6 @@ +1 +2 +zero +1 +1 +0 1 diff --git a/src/libmawk/test_3rd/local.lmawk/delargv.ok b/src/libmawk/test_3rd/local.lmawk/delargv.ok new file mode 100644 index 0000000..e2d96e9 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/delargv.ok @@ -0,0 +1,3 @@ +length of ARGV[0] is 5 +length of ARGV[1] is 9 +length of ARGV[3] is 9 diff --git a/src/libmawk/test_3rd/local.lmawk/devfd1.ok b/src/libmawk/test_3rd/local.lmawk/devfd1.ok new file mode 100644 index 0000000..14f8f2e --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/devfd1.ok @@ -0,0 +1,2 @@ +file on fd 4 +file on fd 5 diff --git a/src/libmawk/test_3rd/local.lmawk/exitval1.ok b/src/libmawk/test_3rd/local.lmawk/exitval1.ok new file mode 100644 index 0000000..d2380ee --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/exitval1.ok @@ -0,0 +1 @@ +EXITVAL=0 diff --git a/src/libmawk/test_3rd/local.lmawk/fcall_exit.ok b/src/libmawk/test_3rd/local.lmawk/fcall_exit.ok new file mode 100644 index 0000000..a1ef49f --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/fcall_exit.ok @@ -0,0 +1,7 @@ + +true(1, 1, crash()) => crash properly. +EXITVAL=1 +true(1, crash(), 1) => do not crash properly. +EXITVAL=1 +true(1, crash()) => do not crash properly. +EXITVAL=1 diff --git a/src/libmawk/test_3rd/local.lmawk/fcall_exit2.ok b/src/libmawk/test_3rd/local.lmawk/fcall_exit2.ok new file mode 100644 index 0000000..b9ac26a --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/fcall_exit2.ok @@ -0,0 +1,7 @@ + +*** 1 + true(1, crash()) => crash properly. +EXITVAL=1 +*** 2 + true(1, crash()) => do not crash properly. +EXITVAL=1 diff --git a/src/libmawk/test_3rd/local.lmawk/fsspcoln.ok b/src/libmawk/test_3rd/local.lmawk/fsspcoln.ok new file mode 100644 index 0000000..0f794f9 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/fsspcoln.ok @@ -0,0 +1,9 @@ + +*** FS=[ :] +b +*** FS=[ :]+ +b +*** FS=[ :]* +b +*** FS=A + diff --git a/src/libmawk/test_3rd/local.lmawk/getlndir.ok b/src/libmawk/test_3rd/local.lmawk/getlndir.ok new file mode 100644 index 0000000..a1170eb --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/getlndir.ok @@ -0,0 +1,2 @@ +lmawk: read error (Is a directory) +4, 0, Is a directory diff --git a/src/libmawk/test_3rd/local.lmawk/hello.ok b/src/libmawk/test_3rd/local.lmawk/hello.ok new file mode 100644 index 0000000..e965047 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/hello.ok @@ -0,0 +1 @@ +Hello diff --git a/src/libmawk/test_3rd/local.lmawk/inftest.ok b/src/libmawk/test_3rd/local.lmawk/inftest.ok new file mode 100644 index 0000000..a26f3e0 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/inftest.ok @@ -0,0 +1,105 @@ +100000 100 +100000000 100000 +1e+11 100000000 +1e+14 1e+11 +1e+17 1e+14 +1e+20 1e+17 +1e+23 1e+20 +1e+26 1e+23 +1e+29 1e+26 +1e+32 1e+29 +1e+35 1e+32 +1e+38 1e+35 +1e+41 1e+38 +1e+44 1e+41 +1e+47 1e+44 +1e+50 1e+47 +1e+53 1e+50 +1e+56 1e+53 +1e+59 1e+56 +1e+62 1e+59 +1e+65 1e+62 +1e+68 1e+65 +1e+71 1e+68 +1e+74 1e+71 +1e+77 1e+74 +1e+80 1e+77 +1e+83 1e+80 +1e+86 1e+83 +1e+89 1e+86 +1e+92 1e+89 +1e+95 1e+92 +1e+98 1e+95 +1e+101 1e+98 +1e+104 1e+101 +1e+107 1e+104 +1e+110 1e+107 +1e+113 1e+110 +1e+116 1e+113 +1e+119 1e+116 +1e+122 1e+119 +1e+125 1e+122 +1e+128 1e+125 +1e+131 1e+128 +1e+134 1e+131 +1e+137 1e+134 +1e+140 1e+137 +1e+143 1e+140 +1e+146 1e+143 +1e+149 1e+146 +1e+152 1e+149 +1e+155 1e+152 +1e+158 1e+155 +1e+161 1e+158 +1e+164 1e+161 +1e+167 1e+164 +1e+170 1e+167 +1e+173 1e+170 +1e+176 1e+173 +1e+179 1e+176 +1e+182 1e+179 +1e+185 1e+182 +1e+188 1e+185 +1e+191 1e+188 +1e+194 1e+191 +1e+197 1e+194 +1e+200 1e+197 +1e+203 1e+200 +1e+206 1e+203 +1e+209 1e+206 +1e+212 1e+209 +1e+215 1e+212 +1e+218 1e+215 +1e+221 1e+218 +1e+224 1e+221 +1e+227 1e+224 +1e+230 1e+227 +1e+233 1e+230 +1e+236 1e+233 +1e+239 1e+236 +1e+242 1e+239 +1e+245 1e+242 +1e+248 1e+245 +1e+251 1e+248 +1e+254 1e+251 +1e+257 1e+254 +1e+260 1e+257 +1e+263 1e+260 +1e+266 1e+263 +1e+269 1e+266 +1e+272 1e+269 +1e+275 1e+272 +1e+278 1e+275 +1e+281 1e+278 +1e+284 1e+281 +1e+287 1e+284 +1e+290 1e+287 +1e+293 1e+290 +1e+296 1e+293 +1e+299 1e+296 +1e+302 1e+299 +1e+305 1e+302 +1e+308 1e+305 +inf 1e+308 +inf inf +loop terminated diff --git a/src/libmawk/test_3rd/local.lmawk/manyfiles.ok b/src/libmawk/test_3rd/local.lmawk/manyfiles.ok new file mode 100644 index 0000000..d29a9df --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/manyfiles.ok @@ -0,0 +1,5 @@ +*** stdout: +*** junk/foo: +bar +*** junk/baz: +BAZ diff --git a/src/libmawk/test_3rd/local.lmawk/messages.ok b/src/libmawk/test_3rd/local.lmawk/messages.ok new file mode 100644 index 0000000..ffb6609 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/messages.ok @@ -0,0 +1,7 @@ +*** stdout: +Normal print statement +This printed on stdout +*** stderr: +You blew it! +*** _out1: +Goes to a file out1 diff --git a/src/libmawk/test_3rd/local.lmawk/nonl.ok b/src/libmawk/test_3rd/local.lmawk/nonl.ok new file mode 100644 index 0000000..e69de29 diff --git a/src/libmawk/test_3rd/local.lmawk/rand.ok b/src/libmawk/test_3rd/local.lmawk/rand.ok new file mode 100644 index 0000000..9c0a546 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/rand.ok @@ -0,0 +1 @@ + 40 27 38 60 66 15 4 4 81 16 56 50 11 17 59 2 3 16 21 diff --git a/src/libmawk/test_3rd/local.lmawk/shadow.ok b/src/libmawk/test_3rd/local.lmawk/shadow.ok new file mode 100644 index 0000000..86e041d --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/shadow.ok @@ -0,0 +1,3 @@ +foo +bar +baz diff --git a/src/libmawk/test_3rd/local.lmawk/uninit2.ok b/src/libmawk/test_3rd/local.lmawk/uninit2.ok new file mode 100644 index 0000000..6ed281c --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/uninit2.ok @@ -0,0 +1,2 @@ +1 +1 diff --git a/src/libmawk/test_3rd/local.lmawk/uninit3.ok b/src/libmawk/test_3rd/local.lmawk/uninit3.ok new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/uninit3.ok @@ -0,0 +1 @@ + diff --git a/src/libmawk/test_3rd/local.lmawk/uninit4.ok b/src/libmawk/test_3rd/local.lmawk/uninit4.ok new file mode 100644 index 0000000..3f2ff2d --- /dev/null +++ b/src/libmawk/test_3rd/local.lmawk/uninit4.ok @@ -0,0 +1,5 @@ + + + + + diff --git a/src/libmawk/test_3rd/local.lmawk/uninitialized.ok b/src/libmawk/test_3rd/local.lmawk/uninitialized.ok new file mode 100644 index 0000000..e69de29 diff --git a/src/libmawk/types.h b/src/libmawk/types.h new file mode 100644 index 0000000..8bc5b2e --- /dev/null +++ b/src/libmawk/types.h @@ -0,0 +1,110 @@ + +/******************************************** +types.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef MAWK_TYPES_H +#define MAWK_TYPES_H + +#include +#include + +/* mawk_cell_t types */ + +typedef enum { + C_NOINIT = 0, +#ifdef CELLDEBUG +/* catch invalid access */ + C_FREED = 1, +#endif + + C_NUM = 2, + C_ARR_REF = 3, /* array reference when using with WRARR: two adjacent cells represend index and array (on the stack, normally) */ + C_ARR_REF_BT=4, /* array reference for bifunct_target references it is a single cell with ptr pointing to the array and d.idx_str containing the index string */ + + C_EXE_STTYPE=5, + C_EXE_STATE=6, + C_REQ_NOMORE=7, + C_REQ_CALL=8, + +/* #### WARNING #### anything that is >= C_STRING is treated as a string: + refco, free(), etc. */ + C_STRING = 16, + C_STRNUM = 17, + C_MBSTRN = 18, /*could be STRNUM, has not been checked */ + C_RE = 19, + C_SPACE = 20, /* split on space */ + C_SNULL = 21, /* split on the empty string */ + C_REPL = 22, /* a replacement string '\&' changed to & */ + C_REPLV = 23, /* a vector replacement -- broken on & */ + NUM_CELL_TYPES +} mawk_celltype_t; + +/* these defines are used to check types for two + CELLs which are adjacent in memory */ +#define TWO_NOINITS (2*(1<ptr) + +typedef struct mawk_cell_s mawk_cell_t; + +struct mawk_cell_s { + mawk_celltype_t type; + PTR ptr; /* payload 1 - can not be in payload 2 because of STRNUMs that use both ptr for string and d.dval for num in the same time */ + union { /* payload 2 */ + mawk_num_t dval; /* number */ + int vcnt; /* regex lib: only used if type == C_REPLV */ + mawk_cell_t *idx_cell; /* zmalloc'd index for bifunct_target array refs */ + } d; +}; + + +/* all builtins are passed the evaluation stack pointer and + return its new value, here is the type */ + +#include "mawk.h" +typedef mawk_cell_t *(*PF_CP) (mawk_state_t *, mawk_cell_t *); + +/* an element of code (instruction) */ +typedef union { + unsigned long op; /* must be unsigned and at least 32 bits for da_bin */ + PTR ptr; +} INST; + +#endif /* MAWK_TYPES_H */ diff --git a/src/libmawk/vargs.h b/src/libmawk/vargs.h new file mode 100644 index 0000000..0e1c257 --- /dev/null +++ b/src/libmawk/vargs.h @@ -0,0 +1,51 @@ + +/******************************************** +vargs.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1992 Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +/* provides common interface to or + only used for error messages +*/ + +#if NO_STDARG_H +#include + +#ifdef VA_ALIST +#define VA_ALIST(mawk_state_t *MAWK; type arg) (va_alist) va_dcl +#define VA_ALIST2(mawk_state_t *MAWK; t1 a1 ; t2 a2) (va_alist) va_dcl +#endif + +#define VA_START(p,type, last) va_start(p) ;\ + MAWK = va_arg(p,mawk_state_t *); \ + last = va_arg(p,type) + + +#define VA_START2(p,t1,a1,t2,a2) va_start(p) ;\ + MAWK = va_arg(p,mawk_state_t *); \ + a1 = va_arg(p,t1);\ + a2 = va_arg(p,t2) + +#else /* have stdarg.h */ +#include + +#ifndef VA_ALIST +#define VA_ALIST(type, arg) (mawk_state_t *MAWK, type arg, ...) +#define VA_ALIST2(t1,a1,t2,a2) (mawk_state_t *MAWK, t1 a1,t2 a2,...) +#endif + +#define VA_START(p,type,last) va_start(p,last) + +#define VA_START2(p,t1,a1,t2,a2) va_start(p,a2) + +#endif diff --git a/src/libmawk/vars.c b/src/libmawk/vars.c new file mode 100644 index 0000000..88d6469 --- /dev/null +++ b/src/libmawk/vars.c @@ -0,0 +1,70 @@ +/******************************************** +vars.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "vars.h" +#include "symtype.h" +#include "zmalloc.h" +#include "cell.h" + +mawk_cell_t *mawk_get_var(mawk_state_t *MAWK, const char *vname) +{ + SYMTAB *fs; + + fs = mawk_find(MAWK, vname, 0); + + /* does symbol exist at all? */ + if (fs == NULL) + return NULL; + + /* return it if it is a variable */ + if ((fs->type == ST_VAR) || (fs->type == ST_NR) || (fs->type == ST_FIELD) || (fs->type == ST_ARRAY)) + return fs->stval.cp; + + return NULL; +} + +mawk_cell_t *mawk_create_var(mawk_state_t *MAWK, const char *name, mawk_cell_t **fp) +{ + SYMTAB *stp; + mawk_cell_t *cp; + static mawk_cell_t cell; /* used if command line assign to pseudo field */ + + stp = mawk_find(MAWK, name, 1); + + switch (stp->type) { + case ST_NONE: + stp->type = ST_VAR; + stp->stval.cp = cp = MAWK_ZMALLOC(MAWK, mawk_cell_t); + break; + + case ST_VAR: + case ST_NR: /* !! no one will do this */ + cp = stp->stval.cp; + mawk_cell_destroy(MAWK, cp); + break; + + case ST_FIELD: + if (fp == NULL) + return NULL; + /* must be pseudo field */ + *fp = stp->stval.cp; + cp = &cell; + break; + + default: + return NULL; + } + return cp; +} diff --git a/src/libmawk/vars.h b/src/libmawk/vars.h new file mode 100644 index 0000000..ebd5dcd --- /dev/null +++ b/src/libmawk/vars.h @@ -0,0 +1,24 @@ +/******************************************** +vars.h + +libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" + +/* look up an existing variable */ +mawk_cell_t *mawk_get_var(mawk_state_t *MAWK, const char *vname); + +/* create a variable and return mawk_cell_t; overwrite/clear existing variable; + return NULL if an object that can not be overwritten already exist on this + name. fp should be NULL if field assignment is not needed */ +mawk_cell_t *mawk_create_var(mawk_state_t *MAWK, const char *name, mawk_cell_t **fp); diff --git a/src/libmawk/version.c b/src/libmawk/version.c new file mode 100644 index 0000000..7d9a8c6 --- /dev/null +++ b/src/libmawk/version.c @@ -0,0 +1,47 @@ + +/******************************************** +version.c + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-95. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "vio_orig.h" + +/* mawk 1.3 */ +#define PATCHLEVEL 3 +#define PATCH_STRING ".3" +#define DATE_STRING "Nov 1996" +#define MAWK_ID "@(#)mawk 1.3.3" + +#define VERSION_STRING \ + "lmawk " LMAWK_VER ", Copyright (C) Tibor 'Igor2' Palinkas\n" \ + " (http://repo.hu/projects/libmawk; email: libmawk (at) igor2.repo.hu)\n" \ + " based on mawk 1.3%s %s, Copyright (C) Michael D. Brennan\n\n" + + +static const char fmt[] = "%-14s%10lu\n"; + +/* print VERSION and exit */ +void mawk_print_version(mawk_state_t * MAWK) +{ + mawk_vio_orig_setup_stdio(MAWK, 0, 1, 1); + + MAWK->fnode_stdout->vf->imp->vprintf(MAWK, MAWK->fnode_stdout->vf, VERSION_STRING, PATCH_STRING, DATE_STRING); + mawk_vio_flush(MAWK, MAWK->fnode_stdout->vf); + + MAWK->fnode_stdout->vf->imp->vprintf(MAWK, MAWK->fnode_stderr->vf, "compiled limits:\n"); + MAWK->fnode_stdout->vf->imp->vprintf(MAWK, MAWK->fnode_stderr->vf, fmt, "max NF", (long) MAX_FIELD); + MAWK->fnode_stdout->vf->imp->vprintf(MAWK, MAWK->fnode_stderr->vf, fmt, "sprintf buffer", (long) SPRINTF_SZ); + exit(0); +} + diff --git a/src/libmawk/version.h b/src/libmawk/version.h new file mode 100644 index 0000000..b75ea33 --- /dev/null +++ b/src/libmawk/version.h @@ -0,0 +1,3 @@ +/* The actual version information is #defined in Makefile.conf.in */ + +extern void mawk_print_version(mawk_state_t * MAWK); diff --git a/src/libmawk/vio.h b/src/libmawk/vio.h new file mode 100644 index 0000000..af55c81 --- /dev/null +++ b/src/libmawk/vio.h @@ -0,0 +1,94 @@ +#ifndef MAWK_VIO_H +#define MAWK_VIO_H +/* virtual I/O */ +typedef enum mawk_vio_open_mode_e { + MAWK_VIO_O_TRUNC, + MAWK_VIO_O_APPEND, + MAWK_VIO_I +} mawk_vio_open_mode_t; + +/**** init-type entry points: used to create a new vio or do a one-time-thing + without an existing vio; MAWK->vio_init.* point to the defaults + but the application is free to create a vf using a different + implementation directly ****/ +/* open a normal file */ +typedef mawk_vio_t *mawk_vio_open_t(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode); + +/* open and close pipe (background process) */ +typedef mawk_vio_t *mawk_vio_open_pipe_t(mawk_state_t *MAWK, const char *name, int type); + +/* execute a shell command (system()-like wrapper around execl()) */ +typedef void mawk_vio_exec_shell_t(mawk_state_t *MAWK, const char *cmd); + + +/**** regular vio calls on existing vios - vf->imp points to these ****/ + +/* write data to a vio * */ +typedef int mawk_vio_putc_t(mawk_state_t *MAWK, mawk_vio_t *vf, char c); +typedef int mawk_vio_write_str_t(mawk_state_t *MAWK, mawk_vio_t *vf, const char *str); +typedef int mawk_vio_write_t(mawk_state_t *MAWK, mawk_vio_t *vf, const char *data, int len); +typedef int mawk_vio_printf_t(mawk_state_t *MAWK, mawk_vio_t *vf, const char *fmt, ...); + +/* return -2 if there's no more input at the moment */ +typedef int mawk_vio_read_t(mawk_state_t *MAWK, mawk_vio_t *f, char *dst, long int size); + + +typedef int mawk_vio_close_t(mawk_state_t *MAWK, mawk_vio_t *vf); +typedef int mawk_vio_flush_t(mawk_state_t *MAWK, mawk_vio_t *vf); + +/* check the error state of a file node (ferror()-style) */ +typedef int mawk_vio_error_t(mawk_state_t *MAWK, mawk_vio_t *vf); + +/* mark the file no-close; this is useful for working on files that are + not really open by the vio implementation, for example stdout/stderr + in vio_orig. */ +typedef void mawk_vio_mark_no_close_t(mawk_state_t *MAWK, mawk_vio_t *vf); + +/* stdio hacks */ + +/* per file hooks */ +typedef struct mawk_vio_imp_s { + mawk_vio_putc_t *vputc; + mawk_vio_write_str_t *vwrite_str; + mawk_vio_write_t *vwrite; + mawk_vio_printf_t *vprintf; + mawk_vio_read_t *vread; + mawk_vio_close_t *vclose; + mawk_vio_flush_t *vflush; + mawk_vio_error_t *error; + mawk_vio_mark_no_close_t *mark_no_close; +} mawk_vio_imp_t; + +/* per file struct */ +struct mawk_vio_s { + /* common header */ + const mawk_vio_imp_t *imp; + int refco; /* how many times this vf is open (and referenced from the FILE_NODEs) */ + /* implementations may have further fields here, never depend on sizeof(mawk_vio_t)! */ +}; + +/* per script instance hooks */ +typedef struct mawk_vio_init_s { + mawk_vio_open_t *vopen; + mawk_vio_open_pipe_t *vopen_pipe; + mawk_vio_exec_shell_t *exec_shell; + int inited; +} mawk_vio_init_t; + +#define mawk_vio_putc(MAWK, vf, c) (vf)->imp->vputc(MAWK, (vf), (c)) +#define mawk_vio_write_str(MAWK, vf, str) (vf)->imp->vwrite_str(MAWK, (vf), (str)) +#define mawk_vio_write(MAWK, vf, data, len) (vf)->imp->vwrite(MAWK, (vf), (data), (len)) +/* can not be implemented without C99 vararg macros: + #define mawk_vio_printf_t(MAWK, vf, const char *fmt, ...); */ +#define mawk_vio_read(MAWK, vf, dst, size) (vf)->imp->vread(MAWK, (vf), (dst), (size)) +#define mawk_vio_close(MAWK, vf) ((((vf) == NULL) || ((vf)->imp->vclose == NULL)) ? (-1) : ((vf)->imp->vclose(MAWK, (vf)))) +#define mawk_vio_flush(MAWK, vf) (vf)->imp->vflush(MAWK, (vf)) +#define mawk_vio_error(MAWK, vf) (vf)->imp->error(MAWK, (vf)) + +/* default init (entry) hooks */ +#define mawk_vio_open(MAWK, name, mode) MAWK->vio_init.vopen(MAWK, (name), (mode)) +#define mawk_vio_open_pipe(MAWK, name, type) MAWK->vio_init.vopen_pipe(MAWK, (name), (type)) +#define mawk_vio_exec_shell(MAWK, cmd) MAWK->vio_init.exec_shell(MAWK, (cmd)) +#define mawk_vio_setup_stdouts(MAWK) MAWK->vio_init.mawk_vio_setup_stdouts(MAWK) + +#endif diff --git a/src/libmawk/vio_fifo.c b/src/libmawk/vio_fifo.c new file mode 100644 index 0000000..67101ea --- /dev/null +++ b/src/libmawk/vio_fifo.c @@ -0,0 +1,177 @@ +/******************************************** +libmawk (C) 2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "vio_fifo.h" +#include "memory.h" + +mawk_vio_t *mawk_vio_fifo_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode) +{ + mawk_vio_fifo_t *v; + v = mawk_zmalloc(MAWK, sizeof(mawk_vio_fifo_t)); + v->eof_from_awk = 0; + v->eof_from_app = 0; + v->vio_common_head.imp = &mawk_vio_fifo_imp; + v->vio_common_head.refco = 0; + mawk_zfifo_alloc(MAWK, &(v->fifo), -1); + switch(mode) { + case MAWK_VIO_O_TRUNC: + case MAWK_VIO_O_APPEND: + v->is_awk2app = 1; + break; + case MAWK_VIO_I: + v->is_awk2app = 0; + break; + } + return (mawk_vio_t *)v; +} + +/* write data to a vio * */ +int mawk_vio_fifo_putc(mawk_state_t *MAWK, mawk_vio_t *vf, char c) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + if (!v->is_awk2app) + return -1; + return mawk_zfifo_write(MAWK, &(v->fifo), &c, 1); +} + +int mawk_vio_fifo_write_str(mawk_state_t *MAWK, mawk_vio_t *vf, const char *str) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + int len = strlen(str); + + if (!v->is_awk2app) + return -1; + + return mawk_zfifo_write(MAWK, &(v->fifo), str, len); +} + +int mawk_vio_fifo_write_app(mawk_state_t *MAWK, mawk_vio_t *vf, const char *data, int len) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + if (v->is_awk2app) + return -1; + return mawk_zfifo_write(MAWK, &(v->fifo), data, len); +} + +int mawk_vio_fifo_write(mawk_state_t *MAWK, mawk_vio_t *vf, const char *data, int len) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + if (!v->is_awk2app) + return -1; + return mawk_zfifo_write(MAWK, &(v->fifo), data, len); +} + +int mawk_vio_fifo_printf(mawk_state_t *MAWK, mawk_vio_t *vf, const char *fmt, ...) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + if (!v->is_awk2app) + return -1; +#warning TODO + abort(); + return -1; +} + + +int mawk_vio_fifo_read(mawk_state_t *MAWK, mawk_vio_t *vf, char *dst, long int size) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + int len; + + if (v->is_awk2app) + return -1; + + len = mawk_zfifo_read(MAWK, &(v->fifo), dst, size); + if (len == 0) { + /* there's no more data and the app has finished: pass on eof */ + if (v->eof_from_app) + return 0; + + /* there's no more data at the moment, but the app didn't say eof yet */ + return -2; + } + + return len; +} + +int mawk_vio_fifo_read_app(mawk_state_t *MAWK, mawk_vio_t *vf, char *dst, long int size) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + int len; + + if (!v->is_awk2app) + return -1; + + len = mawk_zfifo_read(MAWK, &(v->fifo), dst, size); + if (len == 0) { + /* there's no more data and the app has finished: pass on eof */ + if (v->eof_from_awk) + return 0; + + /* there's no more data at the moment, but the app didn't say eof yet */ + return -2; + } + + return len; +} + +static void close_on_eof(mawk_state_t *MAWK, mawk_vio_fifo_t *v) +{ + if ((v->eof_from_app) && (v->eof_from_awk)) { + mawk_zfifo_free(MAWK, &(v->fifo)); + mawk_zfree(MAWK, v, sizeof(mawk_vio_fifo_t)); + } +} + + +int mawk_vio_fifo_close(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + + v->eof_from_awk = 1; + close_on_eof(MAWK, v); + return 0; +} + +int mawk_vio_fifo_eof_from_app(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + mawk_vio_fifo_t *v = (mawk_vio_fifo_t *)vf; + + v->eof_from_app = 1; + close_on_eof(MAWK, v); + return 0; +} + +int mawk_vio_fifo_flush(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + /* nothing to do on flush */ + return 0; +} + +int mawk_vio_fifo_error(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + return (vf == NULL); +} + +void mawk_vio_fifo_mark_no_close(mawk_state_t *MAWK, mawk_vio_t *vf) +{ + /* fifos are always closed when both awk and the app closes them and + there are no inherited pipes anyway */ +} + +const mawk_vio_imp_t mawk_vio_fifo_imp = { + mawk_vio_fifo_putc, + mawk_vio_fifo_write_str, + mawk_vio_fifo_write, + mawk_vio_fifo_printf, + mawk_vio_fifo_read, + mawk_vio_fifo_close, + mawk_vio_fifo_flush, + mawk_vio_fifo_error, + mawk_vio_fifo_mark_no_close +}; diff --git a/src/libmawk/vio_fifo.h b/src/libmawk/vio_fifo.h new file mode 100644 index 0000000..f170c22 --- /dev/null +++ b/src/libmawk/vio_fifo.h @@ -0,0 +1,30 @@ +#ifndef MAWK_VIO_FIFO_H +#define MAWK_VIO_FIFO_H + +#include "mawk.h" +#include "vio.h" +#include "zfifo.h" + +typedef struct mawk_vio_fifo_s { + mawk_vio_t vio_common_head; + mawk_zfifo_t fifo; + int is_awk2app; /* 1 if pipe is awk->app */ + int is_stdout; /* 1 if pipe is an stdout */ + int eof_from_awk; /* 1 if there won't be more from awk or awk won't accept more data (close()) */ + int eof_from_app; /* 1 if there won't be more from the app or the app won't accept more data */ +} mawk_vio_fifo_t; + +const mawk_vio_imp_t mawk_vio_fifo_imp; + +mawk_vio_t *mawk_vio_fifo_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode); + + +int mawk_vio_fifo_eof_from_app(mawk_state_t *MAWK, mawk_vio_t *vf); + +/* app writes an input buffer */ +int mawk_vio_fifo_write_app(mawk_state_t *MAWK, mawk_vio_t *vf, const char *data, int len); + +/* app reads an output buffer */ +int mawk_vio_fifo_read_app(mawk_state_t *MAWK, mawk_vio_t *vf, char *dst, long int size); + +#endif diff --git a/src/libmawk/vio_orig.c b/src/libmawk/vio_orig.c new file mode 100644 index 0000000..9250f4c --- /dev/null +++ b/src/libmawk/vio_orig.c @@ -0,0 +1,451 @@ +/******************************************** +vio_orig.c - virtual IO: original mawk file/pipe IO implementation + +libmawk changes (C) 2009-2013, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991-94. Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ +#include "conf.h" +#include +#include +#include +#include +#include +#include "mawk.h" +#include "files.h" +#include "memory.h" +#include "fin.h" +#include "field.h" +#include "vio.h" +#include "array_environ.h" + +/* The original mawk file IO layer uses FILE * for output (because fprintf() + and fflush() works on them) and int fd for input (probably for short reads + and manual buffering). Pipes (background processes) also need a pid for + wait(). In some cases stdin was read by fgets(). + + The vio reimplementation follows the same conventions, defining mawk_vio_t + to contain a FILE *, an int fd and a pid. For output FILE * is required, + input is done exclusively using int fd. +*/ + +#ifdef V7 +#include /* defines FIOCLEX */ +#endif +#ifndef NO_FCNTL_H + +#include +#define CLOSE_ON_EXEC(fd) fcntl(fd, F_SETFD, 1) + +#else +#define CLOSE_ON_EXEC(fd) ioctl(fd, FIOCLEX, (PTR) 0) +#endif + +typedef struct mawk_vio_orig_s { + mawk_vio_t vio_common_head; + pid_t pid; /* we need to wait() when we close a pipe */ + FILE *f; + int fd; + int at_eof; + int no_close; +} mawk_vio_orig_t; + + +const mawk_vio_imp_t mawk_vio_orig_imp; + +static mawk_vio_orig_t *vio_alloc_(mawk_state_t *MAWK) +{ + mawk_vio_orig_t *vf; + vf = mawk_zmalloc(MAWK, sizeof(mawk_vio_orig_t)); + vf->vio_common_head.imp = &mawk_vio_orig_imp; + vf->vio_common_head.refco = 0; + vf->pid = -1; + vf->fd = -1; + vf->f = NULL; + vf->at_eof = 0; + vf->no_close = 0; + return vf; +} + +int mawk_vio_orig_close(mawk_state_t *MAWK, mawk_vio_t *vf_) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + + if (vf->f != NULL) { + if (!vf->no_close) { + fclose(vf->f); + vf->f = NULL; + } + } + if (vf->fd >= 0) { + if (!vf->no_close) { + close(vf->fd); + vf->fd = -1; + } + } +#ifndef MAWK_NO_FORK + if (vf->pid > 0) { + if (!vf->no_close) { + mawk_wait_for(MAWK, vf->pid); + vf->pid = -1; + } + } +#endif + mawk_zfree(MAWK, vf, sizeof(mawk_vio_orig_t)); + return 0; +} + +int mawk_vio_orig_flush(mawk_state_t *MAWK, mawk_vio_t *vf_) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + if (vf->f != NULL) { + if (fflush(vf->f) < 0) { + mawk_errmsg(MAWK, errno, "unexpected write error"); + mawk_exitval(MAWK, 2, -1); + } + } + return 0; +} + +static void bufsetup(FILE *f) +{ + if (isatty(fileno(f))) + setbuf(f, (char *) 0); +} + +/* fopen() but no buffering to ttys */ +static FILE *tfopen(const char *name, const char *mode) +{ + FILE *retval = fopen(name, mode); + if (retval) + bufsetup(retval); + + return retval; +} + +static mawk_vio_t *mawk_vio_orig_fdopen(mawk_state_t *MAWK, int fd, mawk_vio_open_mode_t mode) +{ + mawk_vio_orig_t *vf; + FILE *f; + + switch(mode) { + case MAWK_VIO_O_TRUNC: + f = fdopen(fd, "w"); + if (f == NULL) + return NULL; + bufsetup(f); + break; + case MAWK_VIO_O_APPEND: + f = fdopen(fd, "a"); + if (f == NULL) + return NULL; + bufsetup(f); + break; + case MAWK_VIO_I: + f = NULL; + break; + } + + vf = vio_alloc_(MAWK); + vf->f = f; + vf->fd = fd; + return (mawk_vio_t *)vf; +} + + + + +#warning TODO print runtime warnings +mawk_vio_t *mawk_vio_orig_open(mawk_state_t *MAWK, const char *name, mawk_vio_open_mode_t mode) +{ + mawk_vio_orig_t *vf; + FILE *f; + int fd = -1; + + if (((name[0] == '-') && (name[1] == 0)) || (strcmp(name, "/dev/stdin") == 0)) + return mawk_vio_orig_fdopen(MAWK, 0, mode); + + /* /dev/fd/xxx should be fdopen */ + if (strncmp(name, "/dev/fd/", 8) == 0) { + int fd; + char *end; + fd = strtol(name+8, &end, 10); + if (*end == '\0') + return mawk_vio_orig_fdopen(MAWK, fd, mode); + } + + switch(mode) { + case MAWK_VIO_O_TRUNC: + f = tfopen(name, "w"); + if (f == NULL) + return NULL; + break; + case MAWK_VIO_O_APPEND: + f = tfopen(name, "a"); + if (f == NULL) + return NULL; + break; + case MAWK_VIO_I: + f = NULL; + fd = open(name, O_RDONLY); + if (fd < 0) + return NULL; + } + + vf = vio_alloc_(MAWK); + vf->f = f; + vf->fd = fd; + return (mawk_vio_t *)vf; +} + +#ifdef MAWK_NO_FORK +mawk_vio_t *mawk_vio_orig_open_pipe(mawk_state_t *MAWK, const char *name, int type) +{ + if (!MAWK->do_exit) + mawk_rt_error(MAWK, "process execution not supported"); + return NULL; +} + +void mawk_vio_orig_exec_shell(mawk_state_t *MAWK, const char *cmd) +{ + if (!MAWK->do_exit) + mawk_rt_error(MAWK, "process execution not supported"); +} +#else + +void mawk_vio_orig_exec_shell(mawk_state_t *MAWK, const char *cmd) +{ + char **envp = mawk_environ_extract(MAWK); + + if (envp == NULL) { + mawk_errmsg(MAWK, errno, "failed to exec %s -c %s: can't set up environ[]", MAWK->shell, cmd); + fflush(stderr); + _exit(128); + } + execle(MAWK->shell, MAWK->shell, "-c", cmd, (char *) 0, envp); + mawk_errmsg(MAWK, errno, "failed to exec %s -c %s", MAWK->shell, cmd); + fflush(stderr); + _exit(128); +} + +#ifdef MAWK_NO_PIPE +mawk_vio_t *mawk_vio_orig_open_pipe(mawk_state_t *MAWK, const char *name, int type) +{ + if (!MAWK->do_exit) + mawk_rt_error(MAWK, "pipes (and background processes) not supported"); + return NULL; +} +#else + +mawk_vio_t *mawk_vio_orig_open_pipe(mawk_state_t *MAWK, const char *name, int type) +{ + int the_pipe[2], local_fd, remote_fd; + mawk_vio_orig_t *vf; + pid_t pid; + FILE *f; + + if (pipe(the_pipe) < 0) + return NULL; + + local_fd = the_pipe[type == PIPE_OUT]; + remote_fd = the_pipe[type == PIPE_IN]; + + /* output files shall have a FILE * */ + if (type == PIPE_OUT) { + f = fdopen(local_fd, "w"); + if (f == NULL) { + close(local_fd); + close(remote_fd); + return NULL; + } + } + + /* to keep output ordered correctly */ + fflush(stdout); + fflush(stderr); + + pid = fork(); + switch (pid) { + case -1: + close(local_fd); + close(remote_fd); + return NULL; + case 0: + close(local_fd); + close(type == PIPE_IN); + dup(remote_fd); + close(remote_fd); + mawk_vio_exec_shell(MAWK, name); + default: + close(remote_fd); + /* we could deadlock if future child inherit the local fd , + set close on exec flag */ +#warning TODO: better do this by hand - close on exec is not really portable + CLOSE_ON_EXEC(local_fd); + break; + } + + vf = vio_alloc_(MAWK); + vf->pid = pid; + + /* for an output pipe store FILE * for consistent writes */ + if (type == PIPE_OUT) + vf->f = f; + else + vf->fd = local_fd; + + return (mawk_vio_t *)vf; +} +#endif +#endif + +int mawk_vio_orig_putc(mawk_state_t *MAWK, mawk_vio_t *vf_, char c) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + if (vf->f == NULL) + return -1; + if (fputc(c, vf->f) != c) + return -1; + return 0; +} + +int mawk_vio_orig_write(mawk_state_t *MAWK, mawk_vio_t *vf_, const char *data, int len) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + if (vf->f == NULL) + return -1; + if (fwrite(data, 1, len, vf->f) == 1) + return 0; + return -1; +} + +int mawk_vio_orig_write_str(mawk_state_t *MAWK, mawk_vio_t *vf_, const char *str) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + if (vf->f == NULL) + return -1; + return fprintf(vf->f, "%s", str); +} + +int mawk_vio_orig_printf(mawk_state_t *MAWK, mawk_vio_t *vf_, const char *fmt, ...) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + int len; + va_list ap; + + if (vf->f == NULL) + return -1; + + va_start(ap, fmt); + len = vfprintf(vf->f, fmt, ap); + va_end(ap); + return len; +} + +int mawk_vio_orig_error(mawk_state_t *MAWK, mawk_vio_t *vf_) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + if (vf->f == NULL) + return -1; + return ferror(vf->f); +} + +int mawk_vio_orig_setbuf(mawk_state_t *MAWK, mawk_vio_t *vf_, int buf_enable) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + if (vf->f == NULL) + return -1; + + if (buf_enable) + setbuf(vf->f, ""); + else + setbuf(vf->f, NULL); + + return 0; +} + +int mawk_vio_orig_read(mawk_state_t *MAWK, mawk_vio_t *vf_, char *dst, long int size) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + int len; + + /* input file must not have a FILE * */ + if (vf->f != NULL) + return -1; + + /* already at eof, don't attempt to read any more */ + if (vf->at_eof) + return 0; + + /* invalid fd, how did we get here? */ + if (vf->fd < 0) + return -1; + + len = read(vf->fd, dst, size); + if (len <= 0) + vf->at_eof = 1; + return len; +} + +void mawk_vio_orig_mark_no_close(mawk_state_t *MAWK, mawk_vio_t *vf_) +{ + mawk_vio_orig_t *vf = (mawk_vio_orig_t *)vf_; + vf->no_close = 1; +} + +const mawk_vio_imp_t mawk_vio_orig_imp = { + mawk_vio_orig_putc, + mawk_vio_orig_write_str, + mawk_vio_orig_write, + mawk_vio_orig_printf, + mawk_vio_orig_read, + mawk_vio_orig_close, + mawk_vio_orig_flush, + mawk_vio_orig_error, + mawk_vio_orig_mark_no_close +}; + +const mawk_vio_init_t mawk_vio_orig_init = { + mawk_vio_orig_open, + mawk_vio_orig_open_pipe, + mawk_vio_orig_exec_shell, + 1 +}; + + +void mawk_vio_orig_setup_stdio(mawk_state_t * MAWK, int enable_stdin, int enable_stdout, int enable_stderr) +{ + mawk_vio_orig_t *vf; + + if (enable_stdin) { + vf = (mawk_vio_orig_t *)mawk_vio_orig_open(MAWK, "/dev/stdin", MAWK_VIO_I); + mawk_file_register(MAWK, "/dev/stdin", F_IN, (mawk_vio_t *)vf); + mawk_vio_orig_mark_no_close(MAWK, (mawk_vio_t *)vf); + } + + if (enable_stdout) { + vf = vio_alloc_(MAWK); + vf->f = stdout; + mawk_file_register_nofin(MAWK, "/dev/stdout", F_TRUNC, (mawk_vio_t *)vf); + mawk_vio_orig_mark_no_close(MAWK, (mawk_vio_t *)vf); + /* don't buffer stdout in interactive mode */ + if (MAWK->interactive_flag) + mawk_vio_orig_setbuf(MAWK, (mawk_vio_t *)vf, 0); + } + + if (enable_stderr) { + vf = vio_alloc_(MAWK); + vf->f = stderr; + mawk_file_register_nofin(MAWK, "/dev/stderr", F_TRUNC, (mawk_vio_t *)vf); + mawk_vio_orig_mark_no_close(MAWK, (mawk_vio_t *)vf); + /* never buffer stderr */ + mawk_vio_orig_setbuf(MAWK, (mawk_vio_t *)vf, 0); + } +} diff --git a/src/libmawk/vio_orig.h b/src/libmawk/vio_orig.h new file mode 100644 index 0000000..2c7ebfc --- /dev/null +++ b/src/libmawk/vio_orig.h @@ -0,0 +1,10 @@ +extern const mawk_vio_imp_t mawk_vio_orig_imp; +extern const mawk_vio_init_t mawk_vio_orig_init; + +/* set up /dev/stdin, /dev/stdout and /dev/stderr (depending on which ones + are enabled) in the original way (stdin to fd0, stdout and stderr to the + FILE * variants*/ +void mawk_vio_orig_setup_stdio(mawk_state_t * MAWK, int enable_stdin, int enable_stdout, int enable_stderr); + +/* manually enable/disable buffering of a vio_orig */ +int mawk_vio_orig_setbuf(mawk_state_t *MAWK, mawk_vio_t *vf_, int buf_enable); diff --git a/src/libmawk/viohack.c b/src/libmawk/viohack.c new file mode 100644 index 0000000..4e06e7c --- /dev/null +++ b/src/libmawk/viohack.c @@ -0,0 +1,22 @@ +/******************************************** +libmawk (C) 2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "field.h" + +extern int isatty(int); + +void mawk_detect_interactive(mawk_state_t *MAWK) +{ + if (isatty(0) && MAWK->rs_shadow.type == SEP_CHAR && MAWK->rs_shadow.c == '\n') { + /* interactive line buffered mode */ + MAWK->interactive_flag = 1; + } +} diff --git a/src/libmawk/viohack.h b/src/libmawk/viohack.h new file mode 100644 index 0000000..71f88be --- /dev/null +++ b/src/libmawk/viohack.h @@ -0,0 +1,6 @@ +/* functions which are somewhat hackish and won't fit in the vio paradigm but + are required in the lmawk executable */ + +/* check whether we need to be interactive and chaneg the interactive flag + accordingly (isatty() and other heuristics) */ +void mawk_detect_interactive(mawk_state_t *MAWK); diff --git a/src/libmawk/zfifo.c b/src/libmawk/zfifo.c new file mode 100644 index 0000000..e6dd22f --- /dev/null +++ b/src/libmawk/zfifo.c @@ -0,0 +1,145 @@ +/******************************************** +libmawk (C) 2009-2014, Tibor 'Igor2' Palinkas; + +This is a source file for libmawk, an implementation of +the AWK programming language, fork of mawk. + +Libmawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include +#include "mawk.h" +#include "zfifo.h" +#include "zmalloc.h" + +mawk_zfifo_t *mawk_zfifo_alloc(mawk_state_t *MAWK, mawk_zfifo_t *sfifo, int max_size) +{ + if (sfifo == NULL) { + sfifo = mawk_zmalloc(MAWK, sizeof(mawk_zfifo_t)); + sfifo->zalloced = 1; + } + else + sfifo->zalloced = 0; + + sfifo->size = 0; + sfifo->max_size = max_size; + sfifo->stage_used = 0; + sfifo->head = NULL; + sfifo->tail = NULL; + return sfifo; +} + +void mawk_zfifo_free(mawk_state_t *MAWK, mawk_zfifo_t *fifo) +{ + mawk_zfifo_block_t *b, *next; + for(b = fifo->head; b != NULL; b = next) { + next = b->next; + mawk_zfree(MAWK, b, sizeof(mawk_zfifo_block_t)); + } + if (fifo->zalloced) + mawk_zfree(MAWK, fifo, sizeof(mawk_zfifo_t)); +} + +int mawk_zfifo_write(mawk_state_t *MAWK, mawk_zfifo_t *fifo, const char *data, int size) +{ + mawk_zfifo_block_t *b; + + /* don't write more than we should */ + if ((fifo->max_size > 0) && (fifo->size + size > fifo->max_size)) + return 0; + + /* small write, append to stage buffer */ + if (fifo->stage_used + size < sizeof(fifo->stage_buf)) { + memcpy(fifo->stage_buf + fifo->stage_used, data, size); + fifo->stage_used += size; + fifo->size += size; + return size; + } + + /* can't stage, alloc a new block at the end of the list and append + the stage _and_ new data there */ + b = mawk_zmalloc(MAWK, sizeof(mawk_zfifo_block_t) + size + fifo->stage_used); + b->size = size + fifo->stage_used; + memcpy(b->buf, fifo->stage_buf, fifo->stage_used); + memcpy(b->buf + fifo->stage_used, data, size); + fifo->stage_used = 0; + fifo->size += size; + b->next = NULL; + b->readp = 0; + + /* append the block to the list */ + if (fifo->tail == NULL) { + fifo->head = b; + fifo->tail = b; + } + else { + fifo->tail->next = b; + fifo->tail = b; + } + + return size; +} + +int mawk_zfifo_read(mawk_state_t *MAWK, mawk_zfifo_t *fifo, char *data, int size) +{ + mawk_zfifo_block_t *b, *next; + char *end = data; + int left = size; + +#define append(from, asize) \ + do { \ + int append_asize = asize; \ + memcpy(end, from, append_asize); \ + end += append_asize; \ + left -= append_asize; \ + fifo->size -= append_asize; \ + } while(0) + + /* start appending blocks from head */ + for(b = fifo->head; b != NULL; b = next) { + next = b->next; + if ((b->size - b->readp) < left) { + /* whole block append */ + append(b->buf + b->readp, b->size - b->readp); + mawk_zfree(MAWK, b, sizeof(mawk_zfifo_block_t)); + fifo->head = next; + } + else { + int old_left; + /* partial append */ + old_left = left; + append(b->buf+b->readp, left); + b->readp += old_left; + break; /* don't even look at further blocks, we are full! */ + } + } + + /* removed all blocks */ + if (fifo->head == NULL) + fifo->tail = NULL; + + /* if there's nothing else left, read from the staging buff */ + if (fifo->stage_used > 0) { + int amount, stage_left; + + /* can we copy the whole buff? */ + if (fifo->stage_used < left) { + amount = fifo->stage_used; + stage_left = 0; + } + else { + /* the rare case when the end of a read cuts the staging buff in half */ + amount = left; + stage_left = fifo->stage_used - amount; + } + append(fifo->stage_buf, amount); + if (stage_left > 0) + memmove(fifo->stage_buf, fifo->stage_buf + amount, stage_left); + fifo->stage_used = stage_left; + } + + return end - data; +} + + diff --git a/src/libmawk/zfifo.h b/src/libmawk/zfifo.h new file mode 100644 index 0000000..98d75f4 --- /dev/null +++ b/src/libmawk/zfifo.h @@ -0,0 +1,41 @@ +#include "mawk.h" + +#define MAWK_ZFIFO_STAGE 64 + +/* zmalloc based binary fifo; a linked list of zmalloc'd blocks of writes. + Small writes are cached until they reach MAWK_ZFIFO_STAGE to avoid a lot + of blocks when writing the fifo char-by-char */ + +typedef struct mawk_zfifo_block_s mawk_zfifo_block_t; + +struct mawk_zfifo_block_s { + mawk_zfifo_block_t *next; /* next block to jump to when this one is exhausted */ + int size; + int readp; /* next character to read from this buffer */ + char buf[1]; /* start of the buffer */ +}; + +typedef struct { + int size, max_size; /* current and maximum size; max size is in data bytes hold by the buffer, there's an overhead of allocation; max_size = -1 means no limit */ + char stage_buf[MAWK_ZFIFO_STAGE]; /* staging: buffer small writes to avoid very small zmalloc(); this also assumes read happens in bigger chunks so stage buffer is most likely read at once */ + int stage_used; + mawk_zfifo_block_t *head, *tail; /* singly linked list of blocks with head and tail */ + int zalloced; /* whether the fifo was zalloc()'d */ +} mawk_zfifo_t; + +/* set up fifo; if sfifo is NULL, the fifo is zalloc()'d, else all fields + are reset */ +mawk_zfifo_t *mawk_zfifo_alloc(mawk_state_t *MAWK, mawk_zfifo_t *sfifo, int max_size); + +/* discard all data and free the buffer; if it was auto-zalloc()'d + in mawk_zfifo_alloc(), fifo is zfree()d here */ +void mawk_zfifo_free(mawk_state_t *MAWK, mawk_zfifo_t *fifo); + +/* append data to the end of a fifo; return size written + (no short write: if max_size prevents the write, no bytes are appended and 0 is returned) */ +int mawk_zfifo_write(mawk_state_t *MAWK, mawk_zfifo_t *fifo, const char *data, int size); + +/* pop data from the beginning of the fifo; return size read + (short read: when there's not enough data in the fifo) */ +int mawk_zfifo_read(mawk_state_t *MAWK, mawk_zfifo_t *fifo, char *data, int size); + diff --git a/src/libmawk/zmalloc.c b/src/libmawk/zmalloc.c new file mode 100644 index 0000000..7f99e0d --- /dev/null +++ b/src/libmawk/zmalloc.c @@ -0,0 +1,181 @@ + +/******************************************** +zmalloc.c + +libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "zmalloc.h" +#include "memory.h" + +/* + mawk_zmalloc() gets mem from malloc() in CHUNKS of 2048 bytes + and cuts these blocks into smaller pieces that are multiples + of eight bytes. When a piece is returned via mawk_zfree(), it goes + on a linked linear list indexed by its size. The lists are + an array, pool[]. + + E.g., if you ask for 22 bytes with p = mawk_zmalloc(22), you actually get + a piece of size 24. When you free it with mawk_zfree(p,22) , it is added + to the list at pool[2]. + + + In more details: + When a block-allocated area is free'd, put it in pool[blocksize], which + is a linked list. When a new reuqest comes in, try to serve it from the + pool. + + If that fails: there is a single chunk allocated, called MAWK->avail, + with an index (MAWK->amt_avail) pointing to the first unclaimed block; cut + down the requested size from there, from the end of the ->avail chunk. If + it is impossible (not enough unclaimed blocks at the end), put the unclaimed + part of ->avail in the pool and open a new ->avail chunk. + + For corner cases see comments there. +*/ + +#define CHUNK 256 + /* number of blocks to get from malloc */ + +static void out_of_mem(mawk_state_t * MAWK); + + +static void out_of_mem(mawk_state_t * MAWK) +{ + static const char out[] = "out of memory"; + + if (MAWK->mawk_state == EXECUTION) + mawk_rt_error(MAWK, out); + else { + /* I don't think this will ever happen */ + mawk_compile_error(MAWK, out); + mawk_exit(MAWK, 2); + } +} + +/* mawk_zmalloc() is a macro in front of mawk_bmalloc "BLOCK malloc" */ +PTR mawk_bmalloc(mawk_state_t *MAWK, register unsigned blocks) +{ + register ZBLOCK *p; + + if (blocks > POOLSZ) { + /* larger than pooled allocations */ + p = (ZBLOCK *) mawk_malloc(MAWK, blocks << ZSHIFT); + if (!p) + out_of_mem(MAWK); + return (PTR) p; + } + + if ((p = MAWK->pool[blocks - 1])) { + /* remove the first item of the linked list and return it */ + MAWK->pool[blocks - 1] = p->link; + return (PTR) p; + } + + if (blocks > MAWK->amt_avail) { + if (MAWK->amt_avail != 0) { + /* block is bigger than what's available in the last chunk + mark the reamining of the last chunk free and put it in the + corresponding pool. We can do this because blocks is small + enough to be pooled and unclaimed area is even smaller + so there is a pool for it for sure. */ + + MAWK->avail->link = MAWK->pool[--MAWK->amt_avail]; + MAWK->pool[MAWK->amt_avail] = MAWK->avail; + } + + /* no unclaimed memory is available in the last chunk, alloc a new chunk */ + + if ((MAWK->avail = (ZBLOCK *) mawk_malloc(MAWK, CHUNK * ZBLOCKSZ)) == NULL) { + /* if we get here, almost out of memory - couldn't allocate a whole + new chunk; try to allocate the current request out-of-pool, as the + request may be smaller than a chunk + + WARNING: this means pool[] contains not only allocated-in-chunk blocks + but plain mallocs as well. + + */ + MAWK->amt_avail = 0; + p = mawk_malloc(MAWK, blocks << ZSHIFT); + if (!p) + out_of_mem(MAWK); + return (PTR) p; + } + else { + /* we have a new chunk to play with */ + MAWK->amt_avail = CHUNK; + } + } + + /* get p from the end of the avail chunk - by now we made sure + we have enough unclaimed blocks at the end */ + p = MAWK->avail; + MAWK->avail += blocks; + MAWK->amt_avail -= blocks; + return (PTR) p; +} + +void mawk_bfree(mawk_state_t *MAWK, register PTR p, register unsigned blocks) +{ + if (blocks > POOLSZ) + mawk_free(MAWK, p); + else { + ((ZBLOCK *) p)->link = MAWK->pool[--blocks]; + MAWK->pool[blocks] = (ZBLOCK *) p; + } +} + +PTR mawk_zrealloc(mawk_state_t *MAWK, register PTR p, unsigned old_size, unsigned new_size) +{ + register PTR q; + + if (new_size > (POOLSZ << ZSHIFT) && old_size > (POOLSZ << ZSHIFT)) { + /* was not a pool allocation, just realloc */ + if (!(q = mawk_realloc(MAWK, p, new_size))) + out_of_mem(MAWK); + } + else { + /* pool allocation: zalloc new, zfree old */ + q = mawk_zmalloc(MAWK, new_size); + if (p != NULL) { + memcpy(q, p, old_size < new_size ? old_size : new_size); + mawk_zfree(MAWK, p, old_size); + } + } + return q; +} + +char *mawk_zstrclone(mawk_state_t *MAWK, const char *s) +{ + int l; + char *ret; + + if (s == NULL) + return NULL; + + l = strlen(s); + ret = mawk_zmalloc(MAWK, l+1); + memcpy(ret, s, l+1); + return ret; +} + + +#ifndef __GNUC__ +/* pacifier for Bison , this is really dead code */ +PTR alloca(unsigned sz) +{ + /* hell just froze over */ + exit(100); + return (PTR) 0; +} +#endif diff --git a/src/libmawk/zmalloc.h b/src/libmawk/zmalloc.h new file mode 100644 index 0000000..68763c3 --- /dev/null +++ b/src/libmawk/zmalloc.h @@ -0,0 +1,34 @@ + +/******************************************** +zmalloc.h + +libmawk changes (C) 2009-2010, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#ifndef ZMALLOC_H +#define ZMALLOC_H + +#include + +PTR mawk_bmalloc(mawk_state_t *, unsigned); +void mawk_bfree(mawk_state_t *, PTR, unsigned); +PTR mawk_zrealloc(mawk_state_t *MAWK, register PTR p, unsigned old_size, unsigned new_size); +char *mawk_zstrclone(mawk_state_t *, const char *s); + +#define mawk_zmalloc(MAWK, size) mawk_bmalloc(MAWK, (((unsigned)size)+ZBLOCKSZ-1)>>ZSHIFT) +#define mawk_zfree(MAWK, p,size) mawk_bfree(MAWK, p,(((unsigned)size)+ZBLOCKSZ-1)>>ZSHIFT) + +#define MAWK_ZMALLOC(MAWK, type) ((type*)mawk_zmalloc(MAWK, sizeof(type))) +#define MAWK_ZFREE(MAWK, p) mawk_zfree(MAWK, p, sizeof(*(p))) + + +#endif /* ZMALLOC_H */ diff --git a/src/libmawk/zmalloc_native.c b/src/libmawk/zmalloc_native.c new file mode 100644 index 0000000..b15e5bf --- /dev/null +++ b/src/libmawk/zmalloc_native.c @@ -0,0 +1,82 @@ + +/******************************************** +zmalloc.c + +libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "zmalloc.h" +#include "memory.h" +#include "nstd.h" + +/* zmalloc alternative using native malloc/realloc/free */ + +static void out_of_mem(mawk_state_t * MAWK) +{ + static const char out[] = "out of memory"; + + if (MAWK->mawk_state == EXECUTION) + mawk_rt_error(MAWK, out); + else { + /* I don't think this will ever happen */ + mawk_compile_error(MAWK, out); + mawk_exit(MAWK, 2); + } +} + +/* mawk_zmalloc() is a macro in front of mawk_bmalloc "BLOCK malloc" */ +PTR mawk_bmalloc(mawk_state_t *MAWK, register unsigned blocks) +{ + PTR q; + q = malloc(blocks << ZSHIFT); + if (q == NULL) + out_of_mem(MAWK); +} + +void mawk_bfree(mawk_state_t *MAWK, register PTR p, register unsigned blocks) +{ + free(p); +} + +PTR mawk_zrealloc(mawk_state_t *MAWK, register PTR p, unsigned old_size, unsigned new_size) +{ + PTR q; + q = realloc(p, new_size); + if (q == NULL) + out_of_mem(MAWK); +} + +char *mawk_zstrclone(mawk_state_t *MAWK, const char *s) +{ + int l; + char *ret; + + if (s == NULL) + return NULL; + + l = strlen(s); + ret = mawk_zmalloc(MAWK, l+1); + memcpy(ret, s, l+1); + return ret; +} + + +#ifndef __GNUC__ +/* pacifier for Bison , this is really dead code */ +PTR alloca(unsigned sz) +{ + /* hell just froze over */ + exit(100); + return (PTR) 0; +} +#endif diff --git a/src/libmawk/zmalloc_safemalloc.c b/src/libmawk/zmalloc_safemalloc.c new file mode 100644 index 0000000..6a521d2 --- /dev/null +++ b/src/libmawk/zmalloc_safemalloc.c @@ -0,0 +1,82 @@ + +/******************************************** +zmalloc.c + +libmawk changes (C) 2009-2012, Tibor 'Igor2' Palinkas; +based on mawk code coming with the below copyright: + +copyright 1991, Michael D. Brennan + +This is a source file for mawk, an implementation of +the AWK programming language. + +Mawk is distributed without warranty under the terms of +the GNU General Public License, version 2, 1991. +********************************************/ + +#include "mawk.h" +#include "zmalloc.h" +#include "memory.h" +#include "nstd.h" + +/* zmalloc alternative using native malloc/realloc/free */ + +static void out_of_mem(mawk_state_t * MAWK) +{ + static const char out[] = "out of memory"; + + if (MAWK->mawk_state == EXECUTION) + mawk_rt_error(MAWK, out); + else { + /* I don't think this will ever happen */ + mawk_compile_error(MAWK, out); + mawk_exit(MAWK, 2); + } +} + +/* mawk_zmalloc() is a macro in front of mawk_bmalloc "BLOCK malloc" */ +PTR mawk_bmalloc(mawk_state_t *MAWK, register unsigned blocks) +{ + PTR q; + q = mawk_malloc(MAWK, blocks << ZSHIFT); + if (q == NULL) + out_of_mem(MAWK); +} + +void mawk_bfree(mawk_state_t *MAWK, register PTR p, register unsigned blocks) +{ + mawk_free(MAWK, p); +} + +PTR mawk_zrealloc(mawk_state_t *MAWK, register PTR p, unsigned old_size, unsigned new_size) +{ + PTR q; + q = mawk_realloc(MAWK, p, new_size); + if (q == NULL) + out_of_mem(MAWK); +} + +char *mawk_zstrclone(mawk_state_t *MAWK, const char *s) +{ + int l; + char *ret; + + if (s == NULL) + return NULL; + + l = strlen(s); + ret = mawk_zmalloc(MAWK, l+1); + memcpy(ret, s, l+1); + return ret; +} + + +#ifndef __GNUC__ +/* pacifier for Bison , this is really dead code */ +PTR alloca(unsigned sz) +{ + /* hell just froze over */ + exit(100); + return (PTR) 0; +} +#endif diff --git a/src/libmawk.h b/src/libmawk.h new file mode 100644 index 0000000..88b64a5 --- /dev/null +++ b/src/libmawk.h @@ -0,0 +1 @@ +#include diff --git a/tools/chlog.sh b/tools/chlog.sh new file mode 100755 index 0000000..02a2c33 --- /dev/null +++ b/tools/chlog.sh @@ -0,0 +1,2 @@ +#!/bin/sh +svn log -r$1:HEAD | grep -v "^\(-*$\|r[0-9]\+\)" diff --git a/tools/chlog_sort.sh b/tools/chlog_sort.sh new file mode 100755 index 0000000..bcb4508 --- /dev/null +++ b/tools/chlog_sort.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +# read the outpot of chlog.sh and split it per topic and save the lines +# into changelog-formatted files + +awk ' + BEGIN { + IGNORE["todo"]=1 + IGNORE["devlog"]=1 + IGNORE["bugreport"]=1 + IGNORE["blog_queue"]=1 + } + + ($1 ~ "^[[][^]]*\]$") { + tag=tolower($1) + sub("[[]", "", tag) + sub("\]", "", tag) + + if (tag in IGNORE) + next + + $1="" + line=$0 + if (!(tag in SEEN)) { + SEEN[tag]++ + } + print " [" tag "]" line > "CHG." tag + next + } + { + line=$0 + print line > "CHG.MISC" + } +' diff --git a/tools/find_globals.sh b/tools/find_globals.sh new file mode 100755 index 0000000..9d72011 --- /dev/null +++ b/tools/find_globals.sh @@ -0,0 +1,6 @@ +#!/bin/sh +# copy or link this file in trunk/src + +ctags ../src/libmawk/* > tags +grep " v" tags | grep -v "static const" > Global_variables + diff --git a/tools/proto.sh b/tools/proto.sh new file mode 100755 index 0000000..671cf32 --- /dev/null +++ b/tools/proto.sh @@ -0,0 +1,6 @@ +#!/bin/sh +for n in *.h *.c +do + sed "/PROTO(/ { s/PROTO(//; s/[,][ ]*[(]/(/; s/);[ ]*$/;/; }" < $n > $n.tmp + mv $n.tmp $n +done diff --git a/tools/rename.sh b/tools/rename.sh new file mode 100755 index 0000000..76906ac --- /dev/null +++ b/tools/rename.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +for n in *.h *.c *.y +do + case $n in + parse.c) ;; + mawk.h);; + *) + mv $n $n.old + sed "s/\([^A-Za-z_>]\)$1/\1mawk_$1/g;s/^$1/mawk_$1/" < $n.old > $n + rm -f $n.old + ;; + esac +done diff --git a/tools/sym b/tools/sym new file mode 100755 index 0000000..6513645 --- /dev/null +++ b/tools/sym @@ -0,0 +1,4 @@ +#!/bin/sh +# list public symbols without proper prefix (mawk_) in object files +nm $* | grep " T " | grep -v " mawk_" + diff --git a/tools/sym_valid.sh b/tools/sym_valid.sh new file mode 100755 index 0000000..646d210 --- /dev/null +++ b/tools/sym_valid.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +# symbol validation; this script lists globally visible symbols with missing +# prefix and persistent states (global vars) + +# ./gloals.sh is in libporty work/c99scripts/globals.sh + +list_macros() +{ + awk -v "fn=$1" ' + /^[ \t]*#define[ \t]*/ { + name=$0 + sub("^[ \t]*#define[ \t]*", "", name) + sub("[ \t(].*", "", name) + print "macro", name, "(" fn ":" NR ")" + } + + ' < $1 +} + +( +echo "" +echo "### missing prefix ###" + +(for n in *.c +do + echo $n >&2 + ./globals.sh -g -I../.. -I.. -DLMAWK_VER=\"1\" $n +done + +# list macros in the headers +for n in *.h +do + list_macros $n +done +) | awk '($2 ~ "^mawk_") || ($2 ~ "^Mawk_") || ($2 ~ "^libmawk_") || /CLASS extern/ || ($2 == "main") { next } { print $0 }' + +echo "" +echo "### persistent state ###" +for n in *.c +do + ./globals.sh -s -I../.. -I.. -DLMAWK_VER=\"1\" $n +done +) diff --git a/tools/unwarn.sh b/tools/unwarn.sh new file mode 100755 index 0000000..8031b5f --- /dev/null +++ b/tools/unwarn.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# This file is placed in the Public Domain. + +# Comment all #warnings in a file given as $1. +# Useful on systems with CC with no support for #warning. + +sed ' + /^#[ \t]*warning.*/ { + s@^@/*@ + s@$@*/@ + } +' < "$1" > "$1.tmp" && mv "$1.tmp" "$1" diff --git a/tools/unwarn_all.sh b/tools/unwarn_all.sh new file mode 100755 index 0000000..8acea47 --- /dev/null +++ b/tools/unwarn_all.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +# This file is placed in the Public Domain. + +# Comment all #warnings in all .h and .c files, recursively. +# Useful on systems with CC with no support for #warning. + +action=`echo "$0" | sed "s/_all//"` + +find . -name '*.[ch]' -exec $action {} \;