Codebase list lua-rexlib / c3179aa
Update upstream source from tag 'upstream/2.9.1' Update to upstream version '2.9.1' with Debian dir e460ddf9b9596389929f06f1b6ffe02f076ae888 Sergei Golovan 2 years ago
40 changed file(s) with 1767 addition(s) and 373 deletion(s). Raw diff Collapse all Expand all
11 *~
22 *.so.*
33 *.so
4 *.src.rock
45 ChangeLog
56 *.zip
67 release-notes
00 License of Lrexlib release
11 --------------------------
22
3 Copyright (C) Reuben Thomas 2000-2012
4 Copyright (C) Shmuel Zeigerman 2004-2012
3 Copyright (C) Reuben Thomas 2000-2020
4 Copyright (C) Shmuel Zeigerman 2004-2020
55
66 Permission is hereby granted, free of charge, to any person
77 obtaining a copy of this software and associated
00 # Makefile for lrexlib
11
2 VERSION = 2.7.2
2 VERSION = 2.9.1
33 PROJECT = lrexlib
44 PROJECT_VERSIONED = $(PROJECT)-$(VERSION)
55
77 LUA = lua
88 LUAROCKS = luarocks
99 CP = cp -a
10 RM = rm
10 RM = rm -f
1111 RST2HTML = rst2html
12 REGNAMES = gnu pcre posix oniguruma tre
12 REGNAMES = gnu pcre pcre2 posix oniguruma tre
1313 LUAROCKS_COMMAND = make
1414
1515
3030
3131 rockspecs:
3232 rm -f *.rockspec
33 $(LUA) mkrockspecs.lua $(VERSION)
33 $(LUA) mkrockspecs.lua $(PROJECT) $(VERSION)
3434
3535 doc/index.txt: README.rst
3636 $(CP) $< $@
3737
3838 check: build
3939 for i in $(REGNAMES); do \
40 LUA_PATH="test/?.lua;$(LUA_PATH)" $(LUA) test/runtest.lua -dsrc/$$i $$i; \
40 LUA_PATH="test/?.lua;$(LUA_PATH);" $(LUA) test/runtest.lua -dsrc/$$i $$i; \
4141 done
4242
4343 clean:
4949 git tag -a -m "Release tag" rel-`echo $(VERSION) | sed -e 's/\./-/g'` && \
5050 git push && git push --tags && \
5151 $(MAKE) build LUAROCKS_COMMAND=build && \
52 woger lua package=$(PROJECT) package_name=$(PROJECT) version=$(VERSION) description="Lua binding for regex libraries" notes=release-notes home="https://github.com/rrthomas/$(PROJECT)"
52 woger lua package=$(PROJECT) package_name=$(PROJECT) version=$(VERSION) description="Lua binding for regex libraries" notes=release-notes home="`$(LUA) -e'version="'$(VERSION)'"; flavour="none"; t = require "rockspecs"; print(t.default.description.homepage)'`"
5353 rm -f release-notes
0 2020-08-07 Release 2.9.1
1
2 * Add Lua 5.4 support
3
4 2017-11-07 Release 2.9.0
5
6 * Add PCRE2 support.
7
8 2015-02-26 Release 2.8.0
9
10 * Add Lua 5.3 support
11 * No longer return empty matches adjacent to previous non-empty match.
12
013 2013-01-08 Release 2.7.2
114
215 * Fixed the use of alternative allocators, and a memory leak.
44 | and Shmuel Zeigerman (shmuz@013net.net)
55
66 **Lrexlib** provides bindings of five regular expression library APIs
7 (POSIX_, PCRE_, GNU_, TRE_ and Oniguruma_) to Lua_ 5.1 and Lua 5.2.
7 (POSIX_, PCRE_, PCRE2_, GNU_, TRE_ and Oniguruma_) to Lua_ >= 5.1.
88 The bindings for TRE and Oniguruma are not currently complete.
99
10 **Lrexlib** is copyright Reuben Thomas 2000-2012 and copyright Shmuel
11 Zeigerman 2004-2012, and is released under the same license as Lua,
10 **Lrexlib** is copyright Reuben Thomas 2000-2020 and copyright Shmuel
11 Zeigerman 2004-2020, and is released under the same license as Lua,
1212 the MIT_ license (otherwise known as the revised BSD license). There
1313 is no warranty.
1414
1515 .. _POSIX: http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
1616 .. _PCRE: http://www.pcre.org/pcre.txt
17 .. _PCRE2: http://www.pcre.org/pcre2.txt
1718 .. _GNU: ftp://ftp.gnu.org/old-gnu/regex/
18 .. _Oniguruma: http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt
19 .. _Oniguruma: https://github.com/kkos/oniguruma
1920 .. _TRE: http://laurikari.net/tre/documentation/
2021 .. _Lua: http://www.lua.org
2122 .. _MIT: http://www.opensource.org/licenses/mit-license.php
3536
3637 luarocks install lrexlib-FLAVOUR
3738
38 where **FLAVOUR** is one of PCRE, POSIX, oniguruma, TRE, GNU
39 where **FLAVOUR** is one of PCRE, PCRE2, POSIX, oniguruma, TRE, GNU
3940
4041 .. _LuaRocks: http://www.luarocks.org
4142
55 <body>
66
77 <h2>Lrexlib</h2>
8 <p>Copyright &copy; Reuben Thomas 2000-2012<br>
9 Copyright &copy; Shmuel Zeigerman 2004-2012
8 <p>Copyright &copy; Reuben Thomas 2000-2020<br>
9 Copyright &copy; Shmuel Zeigerman 2004-2020
1010
1111 <p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1212
77 ------------------------------------------------------------
88
99 **Lrexlib** builds into shared libraries called by default *rex_posix.so*,
10 *rex_pcre.so*, *rex_gnu.so*, *rex_tre.so* and *rex_onig.so*, which can be used with
11 *require*.
10 *rex_pcre.so*, *rex_pcre2.so*, *rex_gnu.so*, *rex_tre.so* and *rex_onig.so*,
11 which can be used with *require*.
1212
1313 ------------------------------------------------------------
1414
2727 MyFunc (arg1, arg2, [arg3], [arg4])
2828
2929 3. Throughout this document (unless it causes ambiguity), the identifier **rex**
30 is used in place of either *rex_posix*, *rex_pcre*, *rex_gnu*, *rex_onig* or
31 *rex_tre*, which are the default namespaces for the corresponding libraries.
30 is used in place of either *rex_posix*, *rex_pcre*, *rex_pcre2*, *rex_gnu*,
31 *rex_onig* or *rex_tre*, which are the default namespaces for the corresponding
32 libraries.
3233
3334 4. All functions that take a regular expression pattern as an argument will
3435 generate an error if that pattern is found invalid by the regex library.
3738 too. In this case, the cf_ and larg_ arguments are ignored (should
3839 be either supplied as nils or omitted).
3940
40 6. All functions that take a string-type subject accept a table (in Lua >= 5.2)
41 or userdata that has a ``topointer`` method and ``__len`` metamethod, and
42 take the subject to be a block of memory starting at the address returned by
41 6. All functions that take a string-type subject accept a table or userdata that
42 has a ``topointer`` method and ``__len`` metamethod, and take the subject to
43 be a block of memory starting at the address returned by
4344 ``subject:topointer()`` and of length ``#subject``. This works with buffers
4445 objects from the alien library (https://github.com/mascarenhas/alien). Note
4546 that special attention is needed with POSIX regex libraries that do not
4647 support ``REG_STARTEND``, and hence need NUL-terminated subjects: the NUL is
47 not included in the string length, so alien buffers must be wrapped to
48 report a length that excludes the NUL.
48 not included in the string length, so alien buffers must be wrapped to report
49 a length that excludes the NUL.
4950
5051 .. _cf:
5152
5253 7. The default value for *compilation flags* (*cf*) that Lrexlib uses when
5354 the parameter is not supplied or ``nil`` is:
5455
55 * REG_EXTENDED for POSIX and TRE
56 * 0 for PCRE
57 * ONIG_OPTION_NONE for Oniguruma
58 * SYNTAX_POSIX_EXTENDED for GNU
59
60 **PCRE**, **Oniguruma**: *cf* may also be supplied as a string, whose
61 characters stand for compilation flags. Combinations of the following
56 * ``REG_EXTENDED`` for POSIX and TRE
57 * ``0`` for PCRE and PCRE2
58 * ``ONIG_OPTION_NONE`` for Oniguruma
59 * ``SYNTAX_POSIX_EXTENDED`` for GNU
60
61 **PCRE**, **PCRE2**, **Oniguruma**: *cf* may also be supplied as a string,
62 whose characters stand for compilation flags. Combinations of the following
6263 characters (case sensitive) are supported:
6364
64 =============== ================== ==============================
65 **Character** **PCRE flag** **Oniguruma flag**
66 =============== ================== ==============================
67 **i** PCRE_CASELESS ONIG_OPTION_IGNORECASE
68 **m** PCRE_MULTILINE ONIG_OPTION_NEGATE_SINGLELINE
69 **s** PCRE_DOTALL ONIG_OPTION_MULTILINE
70 **x** PCRE_EXTENDED ONIG_OPTION_EXTEND
71 **U** PCRE_UNGREEDY n/a
72 **X** PCRE_EXTRA n/a
73 =============== ================== ==============================
65 =============== ================== ================== ==============================
66 **Character** **PCRE flag** **PCRE2 flag** **Oniguruma flag**
67 =============== ================== ================== ==============================
68 **i** PCRE_CASELESS PCRE2_CASELESS ONIG_OPTION_IGNORECASE
69 **m** PCRE_MULTILINE PCRE2_MULTILINE ONIG_OPTION_NEGATE_SINGLELINE
70 **s** PCRE_DOTALL PCRE2_DOTALL ONIG_OPTION_MULTILINE
71 **x** PCRE_EXTENDED PCRE2_EXTENDED ONIG_OPTION_EXTEND
72 **U** PCRE_UNGREEDY PCRE2_UNGREEDY n/a
73 **X** PCRE_EXTRA n/a n/a
74 =============== ================== ================== ==============================
7475
7576 .. _ef:
7677
7778 8. The default value for *execution flags* (*ef*) that Lrexlib uses when
7879 the parameter is not supplied or ``nil``, is:
7980
80 * 0 for standard POSIX regex library
81 * REG_STARTEND for those POSIX regex libraries that support it,
82 e.g. Spencer's.
83 * 0 for PCRE, Oniguruma and TRE
81 * ``0`` for standard POSIX regex library
82 * ``REG_STARTEND`` for those POSIX regex libraries that support it, e.g. Spencer's
83 * ``0`` for PCRE, PCRE2, Oniguruma and TRE
8484
8585 .. _larg:
8686
8787 9. The notation *larg...* is used to indicate optional library-specific
8888 arguments, which are documented in the ``new`` method of each library.
89
90 10. In the functions searching for multiple matches (``gmatch``, ``gsub``,
91 ``split``, ``count``) every empty match adjacent to the previous match
92 is discarded, e.g. ``rex.count("abc",".*")`` will return 1.
8993
9094 ------------------------------------------------------------
9195
222226 *subj* and replaces them according to the parameters *repl* and *n* (see details
223227 below).
224228
225 +---------+-----------------------------------+-------------------------+-------------+
226 |Parameter| Description | Type |Default Value|
227 +=========+===================================+=========================+=============+
228 | subj |subject | string | n/a |
229 +---------+-----------------------------------+-------------------------+-------------+
230 | patt |regular expression pattern |string or userdata | n/a |
231 +---------+-----------------------------------+-------------------------+-------------+
232 | repl |substitution source |string, function, table, | n/a |
233 | | |``false`` or ``nil`` | |
234 +---------+-----------------------------------+-------------------------+-------------+
235 | [n] |maximum number of matches to search| number or function | ``nil`` |
236 | |for, or control function, or nil | | |
237 +---------+-----------------------------------+-------------------------+-------------+
238 | [cf] |compilation flags (bitwise OR) | number | cf_ |
239 +---------+-----------------------------------+-------------------------+-------------+
240 | [ef] |execution flags (bitwise OR) | number | ef_ |
241 +---------+-----------------------------------+-------------------------+-------------+
242 |[larg...]|library-specific arguments | | |
243 +---------+-----------------------------------+-------------------------+-------------+
229 +---------+-----------------------------------+--------------------------+-------------+
230 |Parameter| Description | Type |Default Value|
231 +=========+===================================+==========================+=============+
232 | subj |subject | string | n/a |
233 +---------+-----------------------------------+--------------------------+-------------+
234 | patt |regular expression pattern |string or userdata | n/a |
235 +---------+-----------------------------------+--------------------------+-------------+
236 | repl |substitution source |string, function or table | n/a |
237 +---------+-----------------------------------+--------------------------+-------------+
238 | [n] |maximum number of matches to search| number or function | ``nil`` |
239 | |for, or control function, or nil | | |
240 +---------+-----------------------------------+--------------------------+-------------+
241 | [cf] |compilation flags (bitwise OR) | number | cf_ |
242 +---------+-----------------------------------+--------------------------+-------------+
243 | [ef] |execution flags (bitwise OR) | number | ef_ |
244 +---------+-----------------------------------+--------------------------+-------------+
245 |[larg...]|library-specific arguments | | |
246 +---------+-----------------------------------+--------------------------+-------------+
244247
245248 **Returns:**
246249 1. The subject string with the substitutions made.
248251 3. Number of substitutions made.
249252
250253 **Details:**
251 The parameter *repl* can be either a string, a function, a table,
252 ``false`` or ``nil``. On each match made, it is converted into a
253 value *repl_out* that may be used for the replacement.
254 The parameter *repl* can be either a string, a function or a table.
255 On each match made, it is converted into a value *repl_out* that may be used
256 for the replacement.
254257
255258 *repl_out* is generated differently depending on the type of *repl*:
256259
289292 same rules as for the return value of *repl* call, described in the above
290293 paragraph.
291294
292 4. If *repl* is ``false`` or ``nil``, no replacement is done. Note
293 that, unusually for Lua, if ``repl`` is absent, it is not taken
294 to be ``nil``. This is to prevent programming errors caused by
295 inadvertently missing out *repl*.
296
297295 Note: Under some circumstances, the value of *repl_out* may be ignored; see
298296 below_.
299297
377375
378376 ------------------------------------------------------------
379377
378 count
379 -----
380
381 :funcdef:`rex.count (subj, patt, [cf], [ef], [larg...])`
382
383 This function counts matches of the pattern *patt* in the string *subj*.
384
385 +---------+-----------------------------------+--------------------------+-------------+
386 |Parameter| Description | Type |Default Value|
387 +=========+===================================+==========================+=============+
388 | subj |subject | string | n/a |
389 +---------+-----------------------------------+--------------------------+-------------+
390 | patt |regular expression pattern |string or userdata | n/a |
391 +---------+-----------------------------------+--------------------------+-------------+
392 | [cf] |compilation flags (bitwise OR) | number | cf_ |
393 +---------+-----------------------------------+--------------------------+-------------+
394 | [ef] |execution flags (bitwise OR) | number | ef_ |
395 +---------+-----------------------------------+--------------------------+-------------+
396 |[larg...]|library-specific arguments | | |
397 +---------+-----------------------------------+--------------------------+-------------+
398
399 **Returns:**
400 1. Number of matches found.
401
402 ------------------------------------------------------------
403
380404 flags
381405 -----
382406
407431 constants in the used library. They are formed as follows:
408432
409433 * **POSIX**, **TRE**: prefix REG\_ is omitted, e.g. REG_ICASE becomes ``"ICASE"``.
410 * **PCRE:** prefix PCRE\_ is omitted, e.g. PCRE_CASELESS becomes
411 ``"CASELESS"``.
434 * **PCRE:** prefix PCRE\_ is omitted, e.g. PCRE_CASELESS becomes ``"CASELESS"``.
435 * **PCRE2:** prefix PCRE2\_ is omitted, e.g. PCRE2_CASELESS becomes ``"CASELESS"``.
412436 * **Oniguruma:** names of constants are converted to strings with no alteration,
413437 but for ONIG_OPTION_xxx constants, alias strings are created additionally,
414438 e.g., the value of ONIG_OPTION_IGNORECASE constant becomes accessible via
479503 result, in a table. This table contains ``false`` in the positions where the
480504 corresponding sub-pattern did not participate in the match.
481505
482 1. **PCRE**, **Oniguruma**: if *named subpatterns* are used then the table
483 also contains substring matches keyed by their correspondent subpattern
484 names (strings).
506 1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then
507 the table also contains substring matches keyed by their correspondent
508 subpattern names (strings).
485509
486510 **Returns on failure:**
487511 1. ``nil``
517541 positions where the corresponding sub-pattern did not participate in the
518542 match.
519543
520 1. **PCRE**, **Oniguruma**: if *named subpatterns* are used then the table
521 also contains substring matches keyed by their correspondent subpattern
522 names (strings).
544 1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then
545 the table also contains substring matches keyed by their correspondent
546 subpattern names (strings).
523547
524548 **Returns on failure:**
525549 1. ``nil``
540564 :funcdef:`rex.new (patt, [cf], [lo])`
541565
542566 The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a
543 userdata obtained from a call to maketables_. The default value, used when the
544 parameter is not supplied or ``nil``, is the built-in PCRE set of character
567 userdata obtained from a call to maketables__. The default value, used when
568 the parameter is not supplied or ``nil``, is the built-in PCRE set of character
545569 tables.
570
571 __ maketables_pcre_
572
573 ------------------------------------------------------------
574
575 fullinfo
576 --------
577
578 [See *pcre_fullinfo* in the PCRE docs.]
579
580 :funcdef:`r:fullinfo ()`
581
582 This function returns a table containing information about the compiled pattern.
583 The keys are strings formed in the following way:
584 ``PCRE_INFO_CAPTURECOUNT`` -> ``"CAPTURECOUNT"``. The values are numbers.
585
586 ------------------------------------------------------------
587
588 .. _dfa_exec_pcre:
546589
547590 dfa_exec
548591 --------
587630
588631 ------------------------------------------------------------
589632
633 .. _maketables_pcre:
634
590635 maketables
591636 ----------
592637
624669 :funcdef:`rex_pcre.version ()`
625670
626671 This function returns a string containing the version of the used PCRE library
672 and its release date.
673
674 ------------------------------------------------------------
675
676 PCRE2-only functions and methods
677 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
678
679 new
680 ---
681
682 :funcdef:`rex.new (patt, [cf], [lo])`
683
684 The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a
685 userdata obtained from a call to maketables__. The default value, used when
686 the parameter is not supplied or ``nil``, is the built-in PCRE2 set of character
687 tables.
688
689 __ maketables_pcre2_
690
691 ------------------------------------------------------------
692
693 patterninfo
694 -----------
695
696 [See *pcre2_patterninfo* in the PCRE2 docs.]
697
698 :funcdef:`r:patterninfo ()`
699
700 This function returns a table containing information about the compiled pattern.
701 The keys are strings formed in the following way:
702 ``PCRE2_INFO_CAPTURECOUNT`` -> ``"CAPTURECOUNT"``. The values are numbers.
703
704 ------------------------------------------------------------
705
706 dfa_exec
707 --------
708
709 [See *pcre2_dfa_exec* in the PCRE2 docs.]
710
711 :funcdef:`r:dfa_exec (subj, [init], [ef], [ovecsize], [wscount])`
712
713 The method matches a compiled regular expression *r* against a given subject
714 string *subj*, using a DFA matching algorithm.
715
716 +----------+-------------------------------------+--------+-------------+
717 |Parameter | Description | Type |Default Value|
718 +==========+=====================================+========+=============+
719 | r |regex object produced by new |userdata| n/a |
720 +----------+-------------------------------------+--------+-------------+
721 | subj |subject | string | n/a |
722 +----------+-------------------------------------+--------+-------------+
723 | [init] |start offset in the subject | number | 1 |
724 | |(can be negative) | | |
725 +----------+-------------------------------------+--------+-------------+
726 | [ef] |execution flags (bitwise OR) | number | ef_ |
727 +----------+-------------------------------------+--------+-------------+
728 |[ovecsize]|size of the array for result offsets | number | 100 |
729 +----------+-------------------------------------+--------+-------------+
730 |[wscount] |number of elements in the working | number | 50 |
731 | |space array | | |
732 +----------+-------------------------------------+--------+-------------+
733
734 **Returns on success (either full or partial match):**
735 1. The start point of the matches found (a number).
736 2. A table containing the end points of the matches found, the longer matches
737 first.
738 3. The return value of the underlying *pcre_dfa_exec* call (a number).
739
740 **Returns on failure (no match):**
741 1. ``nil``
742
743 **Example:**
744 If there are 3 matches found starting at offset 10 and ending at offsets 15, 20
745 and 25 then the function returns the following: 10, { 25,20,15 }, 3.
746
747 ------------------------------------------------------------
748
749 jit_compile
750 -----------
751
752 [See *pcre2_jit_compile* in the PCRE2 docs.]
753
754 :funcdef:`r:jit_compile ([options])`
755
756 Parameter *options* is a number (a bitwise OR of separate options;
757 it defaults to ``PCRE2_JIT_COMPLETE``).
758
759 The method returns ``true`` on success or ``false`` + error message string on failure.
760
761 ------------------------------------------------------------
762
763 .. _maketables_pcre2:
764
765 maketables
766 ----------
767
768 [See *pcre2_maketables* in the PCRE2 docs.]
769
770 :funcdef:`rex_pcre2.maketables ()`
771
772 Creates a set of character tables corresponding to the current locale and
773 returns it as a userdata. The returned value can be passed to any Lrexlib
774 function accepting the *locale* parameter.
775
776 ------------------------------------------------------------
777
778 config
779 ------
780
781 [See *pcre2_config* in the PCRE2 docs.]
782
783 :funcdef:`rex_pcre2.config ([tb])`
784
785 This function returns a table containing the values of the configuration
786 parameters used at PCRE2 library build-time. Those parameters (numbers) are
787 keyed by their names (strings). If the table argument *tb* is supplied then it
788 is used as the output table, else a new table is created.
789
790 ------------------------------------------------------------
791
792 version
793 -------
794
795 [See *pcre2_config(PCRE2_CONFIG_VERSION)* in the PCRE2 docs.]
796
797 :funcdef:`rex_pcre2.version ()`
798
799 This function returns a string containing the version of the used PCRE2 library
627800 and its release date.
628801
629802 ------------------------------------------------------------
701874
702875 This function returns a string containing the version of the used Oniguruma
703876 library.
877
878 ------------------------------------------------------------
879
880 capturecount
881 ------------
882
883 [See *onig_number_of_captures* in the Oniguruma docs.]
884
885 :funcdef:`r:capturecount ()`
886
887 Returns the number of captures in the pattern.
704888
705889 ------------------------------------------------------------
706890
8471031 Incompatibilities with previous versions
8481032 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8491033
1034 **Incompatibilities between versions 2.8 and 2.7:**
1035
1036 1. In the functions searching for multiple matches every empty match adjacent
1037 to the previous match is discarded.
1038
8501039 **Incompatibilities between versions 2.6 and 2.5:**
8511040
8521041 1. Removed function ``plainfind``.
8631052
8641053 **Incompatibilities between versions 2.1 and 2.0:**
8651054
866 1. match_, find_, tfind_, exec_, dfa_exec_: only one value (a ``nil``) is
1055 1. match_, find_, tfind_, exec_, dfa_exec__: only one value (a ``nil``) is
8671056 returned when the subject does not match the pattern. Any other failure
8681057 generates an error.
1058
1059 __ dfa_exec_pcre_
8691060
8701061 **Incompatibilities between versions 2.0 and 1.19:**
8711062
0 -- Generate the rockspecs
0 -- Generate rockspecs from a prototype with variants
11
2 require "std"
2 local tree = require "std.tree"
33
4 if select ("#", ...) < 1 then
5 io.stderr:write "Usage: mkrockspecs VERSION\n"
4 if select ("#", ...) < 2 then
5 io.stderr:write "Usage: mkrockspecs PACKAGE VERSION\n"
66 os.exit ()
77 end
88
9 version = select (1, ...)
9 package_name = select (1, ...)
10 version = select (2, ...)
1011
1112 function format (x, indent)
1213 indent = indent or ""
2021 for i, v in ipairs (x) do
2122 s = s..indent..format (v, indent.." ")..",\n"
2223 end
23 return s..indent:sub(1, -3).."}"
24 return s..indent:sub (1, -3).."}"
2425 elseif type (x) == "string" then
2526 return string.format ("%q", x)
2627 else
2829 end
2930 end
3031
32 flavour = "" -- a global, visible in loadfile
3133 for f, spec in pairs (loadfile ("rockspecs.lua") ()) do
3234 if f ~= "default" then
33 local specfile = "lrexlib-"..f:lower ().."-"..version.."-1.rockspec"
35 local specfile = package_name.."-"..(f ~= "" and f:lower ().."-" or "")..version.."-1.rockspec"
3436 h = io.open (specfile, "w")
3537 assert (h)
36 flavour = f -- a global, visible in loadfile
38 flavour = f
3739 local specs = loadfile ("rockspecs.lua") () -- reload to get current flavour interpolated
38 local spec = table.merge (specs.default, specs[f])
40 local spec = tree.merge (tree (specs.default), tree (specs[f]))
3941 local s = ""
4042 for i, v in pairs (spec) do
4143 s = s..i.." = "..format (v, " ").."\n"
44 -- flavour: regex library
55 -- version
66
7 local flavours = {"PCRE", "POSIX", "oniguruma", "TRE", "GNU"}
7 local flavours = {"PCRE", "PCRE2", "POSIX", "oniguruma", "TRE", "GNU"}
88 local version_dashed = version:gsub ("%.", "-")
9 -- FIXME: PCRE2 define should be only in PCRE2 rockspec
10 local defines = {"VERSION=\""..version.."\"",
11 "PCRE2_CODE_UNIT_WIDTH=8"}
912
1013 -- FIXME: When Lua 5.1 support is dropped, use an env argument with
1114 -- loadfile instead of wrapping in a table
2124 description = {
2225 summary = "Regular expression library binding ("..flavour.." flavour).",
2326 detailed = [[
24 Lrexlib is a regular expression library for Lua 5.1 and 5.2, which
27 Lrexlib is a regular expression library for Lua 5.1-5.4, which
2528 provides bindings for several regular expression libraries.
2629 This rock provides the ]]..flavour..[[ bindings.]],
2730 homepage = "http://github.com/rrthomas/lrexlib",
4346 type = "builtin",
4447 modules = {
4548 rex_pcre = {
46 defines = {"VERSION=\""..version.."\""},
49 defines = defines,
4750 sources = {"src/common.c", "src/pcre/lpcre.c", "src/pcre/lpcre_f.c"},
4851 libraries = {"pcre"},
4952 incdirs = {"$(PCRE_INCDIR)"},
5053 libdirs = {"$(PCRE_LIBDIR)"}
54 }
55 }
56 }
57 },
58
59 PCRE2 = {
60 external_dependencies = {
61 PCRE2 = {
62 header = "pcre2.h",
63 library = "pcre2-8"
64 }
65 },
66 build = {
67 type = "builtin",
68 modules = {
69 rex_pcre2 = {
70 defines = defines,
71 sources = {"src/common.c", "src/pcre2/lpcre2.c", "src/pcre2/lpcre2_f.c"},
72 libraries = {"pcre2-8"},
73 incdirs = {"$(PCRE2_INCDIR)"},
74 libdirs = {"$(PCRE2_LIBDIR)"}
5175 }
5276 }
5377 }
6387 type = "builtin",
6488 modules = {
6589 rex_posix = {
66 defines = {"VERSION=\""..version.."\""},
90 defines = defines,
6791 sources = {"src/common.c", "src/posix/lposix.c"}
6892 }
6993 }
81105 type = "builtin",
82106 modules = {
83107 rex_onig = {
84 defines = {"VERSION=\""..version.."\""},
108 defines = defines,
85109 sources = {"src/common.c", "src/oniguruma/lonig.c", "src/oniguruma/lonig_f.c"},
86110 libraries = {"onig"},
87111 incdirs = {"$(ONIG_INCDIR)"},
102126 type = "builtin",
103127 modules = {
104128 rex_tre = {
105 defines = {"VERSION=\""..version.."\""},
129 defines = defines,
106130 sources = {"src/common.c", "src/tre/ltre.c" --[[, "src/tre/tre_w.c"]]},
107131 libraries = {"tre"},
108132 incdirs = {"$(TRE_INCDIR)"},
122146 type = "builtin",
123147 modules = {
124148 rex_gnu = {
125 defines = {"VERSION=\""..version.."\""},
149 defines = defines,
126150 sources = {"src/common.c", "src/gnu/lgnu.c"}
127151 }
128152 }
88 static void gmatch_pushsubject (lua_State *L, TArgExec *argE);
99 static int findmatch_exec (TUserdata *ud, TArgExec *argE);
1010 static int split_exec (TUserdata *ud, TArgExec *argE, int offset);
11 static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset);
12 static int gmatch_exec (TUserdata *ud, TArgExec *argE);
1113 static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud);
1214 static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
1315
3234 #ifndef DO_NAMED_SUBPATTERNS
3335 #define DO_NAMED_SUBPATTERNS(a,b,c)
3436 #endif
35
36 /* When doing an iterative search, there can occur a situation of a zero-length
37 * match at the current position, that prevents further advance on the subject
38 * string.
39 * There are two ways to handle that (AFAIK):
40 * a) Advance by one character (continue the search from the next position),
41 * or
42 * b) Search for a non-zero-length match that begins from the current
43 * position ("retry" the search). If the match is not found then advance
44 * by one character.
45 * The "b)" seems more correct, but most regex libraries expose no API for that.
46 * The known exception is PCRE that has flags PCRE_NOTEMPTY and PCRE_ANCHORED.
47 */
48 #ifdef ALG_USERETRY
49 #define SET_RETRY(a,b) (a=b)
50 static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset, int retry);
51 static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry);
52 #define GSUB_EXEC gsub_exec
53 #define GMATCH_EXEC gmatch_exec
54 #else
55 #define SET_RETRY(a,b) ((void)a)
56 static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset);
57 static int gmatch_exec (TUserdata *ud, TArgExec *argE);
58 #define GSUB_EXEC(a,b,c,d) gsub_exec(a,b,c)
59 #define GMATCH_EXEC(a,b,c) gmatch_exec(a,b)
60 #endif
61
6237
6338 #define METHOD_FIND 0
6439 #define METHOD_MATCH 1
8055
8156
8257 static int get_startoffset(lua_State *L, int stackpos, size_t len) {
83 int startoffset = luaL_optint(L, stackpos, 1);
58 int startoffset = (int)luaL_optinteger(L, stackpos, 1);
8459 if(startoffset > 0)
8560 startoffset--;
8661 else if(startoffset < 0) {
131106 if (type != LUA_TLIGHTUSERDATA)
132107 luaL_error (L, "subject's topointer method returned %s (expected lightuserdata)",
133108 lua_typename (L, type));
134 argE->text = lua_touserdata (L, -1);
109 argE->text = (const char*) lua_touserdata (L, -1);
135110 lua_pop (L, 1);
136111 #if LUA_VERSION_NUM == 501
137 lua_objlen (L, pos);
112 if (luaL_callmeta (L, pos, "__len")) {
113 if (lua_type (L, -1) != LUA_TNUMBER)
114 luaL_argerror (L, pos, "subject's length is not a number");
115 argE->textlen = lua_tointeger (L, -1);
116 lua_pop (L, 1);
117 }
118 else
119 argE->textlen = lua_objlen (L, pos);
138120 #else
139 lua_len (L, pos);
140 #endif
141 type = lua_type (L, -1);
142 if (type != LUA_TNUMBER)
143 luaL_error (L, "subject's length is %s (expected number)",
144 lua_typename (L, type));
145 argE->textlen = lua_tointeger (L, -1);
146 lua_pop (L, 1);
121 argE->textlen = luaL_len (L, pos);
122 #endif
147123 }
148124 }
149125
154130 argC->ud = NULL;
155131 }
156132 else if ((argC->ud = test_ud (L, pos)) == NULL)
157 luaL_typerror(L, pos, "string or "REX_TYPENAME);
133 luaL_typerror(L, pos, "string or " REX_TYPENAME);
158134 }
159135
160136 static void checkarg_new (lua_State *L, TArgComp *argC) {
171147 lua_tostring (L, 3); /* converts number (if any) to string */
172148 argE->reptype = lua_type (L, 3);
173149 if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE &&
174 argE->reptype != LUA_TFUNCTION && argE->reptype != LUA_TNIL &&
175 (argE->reptype != LUA_TBOOLEAN ||
176 (argE->reptype == LUA_TBOOLEAN && lua_toboolean (L, 3)))) {
177 luaL_typerror (L, 3, "string, table, function, false or nil");
150 argE->reptype != LUA_TFUNCTION) {
151 luaL_typerror (L, 3, "string, table or function");
178152 }
179153 argE->funcpos = 3;
180154 argE->funcpos2 = 4;
181155 argE->maxmatch = OptLimit (L, 4);
182156 argC->cflags = ALG_GETCFLAGS (L, 5);
183 argE->eflags = luaL_optint (L, 6, ALG_EFLAGS_DFLT);
157 argE->eflags = (int)luaL_optinteger (L, 6, ALG_EFLAGS_DFLT);
184158 ALG_GETCARGS (L, 7, argC);
159 }
160
161
162 /* function count (s, patt, [cf], [ef], [larg...]) */
163 static void checkarg_count (lua_State *L, TArgComp *argC, TArgExec *argE) {
164 check_subject (L, 1, argE);
165 check_pattern (L, 2, argC);
166 argC->cflags = ALG_GETCFLAGS (L, 3);
167 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
168 ALG_GETCARGS (L, 5, argC);
185169 }
186170
187171
192176 check_pattern (L, 2, argC);
193177 argE->startoffset = get_startoffset (L, 3, argE->textlen);
194178 argC->cflags = ALG_GETCFLAGS (L, 4);
195 argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT);
179 argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT);
196180 ALG_GETCARGS (L, 6, argC);
197181 }
198182
203187 check_subject (L, 1, argE);
204188 check_pattern (L, 2, argC);
205189 argC->cflags = ALG_GETCFLAGS (L, 3);
206 argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT);
190 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
207191 ALG_GETCARGS (L, 5, argC);
208192 }
209193
216200 *ud = check_ud (L);
217201 check_subject (L, 2, argE);
218202 argE->startoffset = get_startoffset (L, 3, argE->textlen);
219 argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT);
203 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
220204 }
221205
222206
243227 TUserdata *ud;
244228 TArgComp argC;
245229 TArgExec argE;
246 int n_match = 0, n_subst = 0, st = 0, retry;
230 int n_match = 0, n_subst = 0, st = 0, last_to = -1;
247231 TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut;
248232 TFreeList freelist;
249233 /*------------------------------------------------------------------*/
266250 }
267251 /*------------------------------------------------------------------*/
268252 buffer_init (&BufOut, 1024, L, &freelist);
269 SET_RETRY (retry, 0);
270253 while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (int)argE.textlen) {
271254 int from, to, res;
272255 int curr_subst = 0;
273 res = GSUB_EXEC (ud, &argE, st, retry);
256 res = gsub_exec (ud, &argE, st);
274257 if (ALG_NOMATCH (res)) {
275 #ifdef ALG_USERETRY
276 if (retry) {
277 if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
278 buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
279 st += ALG_CHARSIZE;
280 retry = 0;
281 continue;
282 }
283 }
284 #endif
285258 break;
286259 }
287260 else if (!ALG_ISMATCH (res)) {
288261 freelist_free (&freelist);
289262 return generate_error (L, ud, res);
290263 }
291 ++n_match;
292264 from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
293265 to = ALG_BASE(st) + ALG_SUBEND(ud,0);
266 if (to == last_to) { /* discard an empty match adjacent to the previous match */
267 if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
268 buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
269 st += ALG_CHARSIZE;
270 continue;
271 }
272 break;
273 }
274 last_to = to;
275 ++n_match;
294276 if (st < from) {
295277 buffer_addlstring (&BufOut, argE.text + st, from - st);
296278 #ifdef ALG_PULL
333315 freelist_free (&freelist);
334316 return lua_error (L); /* re-raise the error */
335317 }
336 }
337 /*----------------------------------------------------------------*/
338 else if (argE.reptype == LUA_TNIL || argE.reptype == LUA_TBOOLEAN) {
339 buffer_addlstring (pBuf, argE.text + from, to - from);
340318 }
341319 /*----------------------------------------------------------------*/
342320 if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) {
400378 n_subst += curr_subst;
401379 if (st < to) {
402380 st = to;
403 SET_RETRY (retry, 0);
404381 }
405382 else if (st < (int)argE.textlen) {
406 #ifdef ALG_USERETRY
407 retry = 1;
408 #else
409383 /* advance by 1 char (not replaced) */
410384 buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
411385 st += ALG_CHARSIZE;
412 #endif
413386 }
414387 else break;
415388 }
420393 lua_pushinteger (L, n_subst);
421394 freelist_free (&freelist);
422395 return 3;
396 }
397
398
399 static int algf_count (lua_State *L) {
400 TUserdata *ud;
401 TArgComp argC;
402 TArgExec argE;
403 int n_match = 0, st = 0, last_to = -1;
404 /*------------------------------------------------------------------*/
405 checkarg_count (L, &argC, &argE);
406 if (argC.ud) {
407 ud = (TUserdata*) argC.ud;
408 lua_pushvalue (L, 2);
409 }
410 else compile_regex (L, &argC, &ud);
411 /*------------------------------------------------------------------*/
412 while (st <= (int)argE.textlen) {
413 int to, res;
414 res = gsub_exec (ud, &argE, st);
415 if (ALG_NOMATCH (res)) {
416 break;
417 }
418 else if (!ALG_ISMATCH (res)) {
419 return generate_error (L, ud, res);
420 }
421 to = ALG_BASE(st) + ALG_SUBEND(ud,0);
422 if (to == last_to) { /* discard an empty match adjacent to the previous match */
423 if (st < (int)argE.textlen) { /* advance by 1 char */
424 st += ALG_CHARSIZE;
425 continue;
426 }
427 break;
428 }
429 last_to = to;
430 ++n_match;
431 #ifdef ALG_PULL
432 {
433 int from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
434 if (st < from)
435 st = from;
436 }
437 #endif
438 /*----------------------------------------------------------------*/
439 if (st < to) {
440 st = to;
441 }
442 else if (st < (int)argE.textlen) {
443 /* advance by 1 char (not replaced) */
444 st += ALG_CHARSIZE;
445 }
446 else break;
447 }
448 /*------------------------------------------------------------------*/
449 lua_pushinteger (L, n_match);
450 return 1;
423451 }
424452
425453
475503
476504
477505 static int gmatch_iter (lua_State *L) {
478 int retry;
506 int last_end, res;
479507 TArgExec argE;
480508 TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
481509 argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
482510 argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
483511 argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
484 #ifdef ALG_USERETRY
485 retry = lua_tointeger (L, lua_upvalueindex (5));
486 #endif
487
488 if (argE.startoffset > (int)argE.textlen)
489 return 0;
512 last_end = lua_tointeger (L, lua_upvalueindex (5));
490513
491514 while (1) {
492 int res = GMATCH_EXEC (ud, &argE, retry);
515 if (argE.startoffset > (int)argE.textlen)
516 return 0;
517 res = gmatch_exec (ud, &argE);
493518 if (ALG_ISMATCH (res)) {
494519 int incr = 0;
495 if (ALG_SUBLEN(ud,0)) {
496 SET_RETRY (retry, 0);
497 }
498 else { /* no progress: prevent endless loop */
499 #ifdef ALG_USERETRY
500 SET_RETRY (retry, 1);
501 #else
520 if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
521 if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
522 argE.startoffset += ALG_CHARSIZE;
523 continue;
524 }
502525 incr = ALG_CHARSIZE;
503 #endif
504 }
505 lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */
526 }
527 last_end = ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0);
528 lua_pushinteger(L, last_end + incr); /* update start offset */
506529 lua_replace (L, lua_upvalueindex (4));
507 #ifdef ALG_USERETRY
508 lua_pushinteger (L, retry);
509 lua_replace (L, lua_upvalueindex (5)); /* update retry */
510 #endif
530 lua_pushinteger(L, last_end); /* update last end of match */
531 lua_replace (L, lua_upvalueindex (5));
511532 /* push either captures or entire match */
512533 if (ALG_NSUB(ud)) {
513534 push_substrings (L, ud, argE.text, NULL);
518539 return 1;
519540 }
520541 }
521 else if (ALG_NOMATCH (res)) {
522 #ifdef ALG_USERETRY
523 if (retry) {
524 if (argE.startoffset < (int)argE.textlen) {
525 ++argE.startoffset; /* advance by 1 char */
526 SET_RETRY (retry, 0);
527 continue;
528 }
529 }
530 #endif
542 else if (ALG_NOMATCH (res))
531543 return 0;
532 }
533544 else
534545 return generate_error (L, ud, res);
535546 }
537548
538549
539550 static int split_iter (lua_State *L) {
540 int incr, newoffset, res;
551 int incr, last_end, newoffset, res;
541552 TArgExec argE;
542553 TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
543554 argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
544555 argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
545556 argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
546557 incr = lua_tointeger (L, lua_upvalueindex (5));
547
548 if (argE.startoffset > (int)argE.textlen)
558 last_end = lua_tointeger (L, lua_upvalueindex (6));
559
560 if (incr < 0)
549561 return 0;
550562
551 if ((newoffset = argE.startoffset + incr) > (int)argE.textlen)
552 goto nomatch;
553
554 res = split_exec (ud, &argE, newoffset);
555 if (ALG_ISMATCH (res)) {
556 lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */
557 lua_replace (L, lua_upvalueindex (4));
558 lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
559 lua_replace (L, lua_upvalueindex (5));
560 /* push text preceding the match */
561 lua_pushlstring (L, argE.text + argE.startoffset,
562 ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
563 /* push either captures or entire match */
564 if (ALG_NSUB(ud)) {
565 push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
566 return 1 + ALG_NSUB(ud);
567 }
568 else {
569 ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
570 return 2;
571 }
572 }
573 else if (ALG_NOMATCH (res))
574 goto nomatch;
575 else
576 return generate_error (L, ud, res);
577
578 nomatch:
579 lua_pushinteger (L, argE.textlen + 1); /* mark as last iteration */
580 lua_replace (L, lua_upvalueindex (4)); /* update start offset */
563 while (1) {
564 if ((newoffset = argE.startoffset + incr) > (int)argE.textlen)
565 break;
566 res = split_exec (ud, &argE, newoffset);
567 if (ALG_ISMATCH (res)) {
568 if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
569 if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
570 incr += ALG_CHARSIZE;
571 continue;
572 }
573 }
574 lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset and last_end */
575 lua_pushvalue (L, -1);
576 lua_replace (L, lua_upvalueindex (4));
577 lua_replace (L, lua_upvalueindex (6));
578 lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
579 lua_replace (L, lua_upvalueindex (5));
580 /* push text preceding the match */
581 lua_pushlstring (L, argE.text + argE.startoffset,
582 ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
583 /* push either captures or entire match */
584 if (ALG_NSUB(ud)) {
585 push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
586 return 1 + ALG_NSUB(ud);
587 }
588 else {
589 ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
590 return 2;
591 }
592 }
593 else if (ALG_NOMATCH (res))
594 break;
595 else
596 return generate_error (L, ud, res);
597 }
598 lua_pushinteger (L, -1); /* mark as last iteration */
599 lua_replace (L, lua_upvalueindex (5)); /* incr = -1 */
581600 lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset);
582601 return 1;
583602 }
587606 {
588607 TArgComp argC;
589608 TArgExec argE;
590 TUserdata *ud;
591609 checkarg_gmatch_split (L, &argC, &argE);
592 if (argC.ud) {
593 ud = (TUserdata*) argC.ud;
610 if (argC.ud)
594611 lua_pushvalue (L, 2);
595 }
596 else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
612 else
613 compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */
597614 gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
598615 lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
599616 lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
600 #ifdef ALG_USERETRY
601 lua_pushinteger (L, 0); /* 5-th upvalue: retry */
617 lua_pushinteger (L, -1); /* 5-th upvalue: last end of match */
602618 lua_pushcclosure (L, gmatch_iter, 5);
603 #else
604 lua_pushcclosure (L, gmatch_iter, 4);
605 #endif
606619 return 1;
607620 }
608621
610623 {
611624 TArgComp argC;
612625 TArgExec argE;
613 TUserdata *ud;
614626 checkarg_gmatch_split (L, &argC, &argE);
615 if (argC.ud) {
616 ud = (TUserdata*) argC.ud;
627 if (argC.ud)
617628 lua_pushvalue (L, 2);
618 }
619 else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
629 else
630 compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */
620631 gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
621632 lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
622633 lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
623634 lua_pushinteger (L, 0); /* 5-th upvalue: incr */
624 lua_pushcclosure (L, split_iter, 5);
635 lua_pushinteger (L, -1); /* 6-th upvalue: last_end */
636 lua_pushcclosure (L, split_iter, 6);
625637 return 1;
626638 }
627639
734746 #endif
735747 lua_pushfstring (L, REX_VERSION" (for %s)", name);
736748 lua_setfield (L, -2, "_VERSION");
737 }
749 #ifndef REX_NOEMBEDDEDTEST
750 lua_pushcfunction (L, newmembuffer);
751 lua_setfield (L, -2, "_newmembuffer");
752 #endif
753 }
125125 enum { ID_NUMBER, ID_STRING };
126126
127127 void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl) {
128 buf->arr = Lmalloc(L, sz);
128 buf->arr = (char*) Lmalloc(L, sz);
129129 if (!buf->arr) {
130130 freelist_free (fl);
131131 luaL_error (L, "malloc failed");
263263 return luaL_argerror(L, narg, msg);
264264 }
265265 #endif
266
267 #ifndef REX_NOEMBEDDEDTEST
268 static int ud_topointer (lua_State *L) {
269 lua_pushlightuserdata (L, lua_touserdata (L, 1));
270 return 1;
271 }
272
273 static int ud_len (lua_State *L) {
274 lua_pushinteger (L, lua_objlen (L, 1));
275 return 1;
276 }
277
278 /* for testing purposes only */
279 int newmembuffer (lua_State *L) {
280 size_t len;
281 const char* s = luaL_checklstring (L, 1, &len);
282 void *ud = lua_newuserdata (L, len);
283 memcpy (ud, s, len);
284 lua_newtable (L); /* metatable */
285 lua_pushvalue (L, -1);
286 lua_setfield (L, -2, "__index"); /* metatable.__index = metatable */
287 lua_pushcfunction (L, ud_topointer);
288 lua_setfield (L, -2, "topointer");
289 lua_pushcfunction (L, ud_len);
290 lua_setfield (L, -2, "__len");
291 lua_setmetatable (L, -2);
292 return 1;
293 }
294 #endif /* #ifndef REX_NOEMBEDDEDTEST */
66 #include "lua.h"
77
88 #if LUA_VERSION_NUM > 501
9 # define lua_objlen lua_rawlen
910 int luaL_typerror (lua_State *L, int narg, const char *tname);
1011 #endif
1112
9495 void *Lrealloc (lua_State *L, void *p, size_t osize, size_t nsize);
9596 void Lfree (lua_State *L, void *p, size_t size);
9697
98 #ifndef REX_NOEMBEDDEDTEST
99 int newmembuffer (lua_State *L);
97100 #endif
101
102 #endif
3232 #define ALG_CFLAGS_DFLT RE_SYNTAX_POSIX_EXTENDED
3333 #define ALG_EFLAGS_DFLT 0
3434
35 #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT)
35 #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT)
3636
3737 static const unsigned char *gettranslate (lua_State *L, int pos);
3838 #define ALG_GETCARGS(L,pos,argC) argC->translate = gettranslate (L, pos)
4949 lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n))
5050
5151 #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
52 (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
52 (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
5353
5454 #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1)
5555 #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n))
298298 { "find", algf_find },
299299 { "gmatch", algf_gmatch },
300300 { "gsub", algf_gsub },
301 { "count", algf_count },
301302 { "split", algf_split },
302303 { "new", algf_new },
303304 { "flags", Gnu_get_flags },
5454
5555 #define ALG_BASE(st) 0
5656 #define ALG_PULL
57 /* #define ALG_USERETRY */
5857
5958 typedef struct {
6059 regex_t *reg;
102101
103102 static int generate_error (lua_State *L, const TOnig *ud, int errcode) {
104103 char buf [ONIG_MAX_ERROR_MESSAGE_LEN];
105 onig_error_code_to_str(buf, errcode, ud->einfo);
104 onig_error_code_to_str((unsigned char*) buf, errcode, &ud->einfo);
106105 return luaL_error(L, buf);
107106 }
108107
206205 */
207206 static int LOnig_setdefaultsyntax (lua_State *L) {
208207 (void)luaL_checkstring(L, 1);
209 onig_set_default_syntax(getsyntax(L, 1));
208 onig_set_default_syntax((OnigSyntaxType*) getsyntax(L, 1));
210209 return 0;
211210 }
212211
286285 return gsub_exec(ud, argE, st);
287286 }
288287
288 static int LOnig_capturecount (lua_State *L) {
289 TOnig *ud = check_ud(L);
290 lua_pushinteger(L, onig_number_of_captures(ud->reg));
291 return 1;
292 }
293
289294 static int LOnig_gc (lua_State *L) {
290295 TOnig *ud = check_ud (L);
291296 if (ud->reg) { /* precaution against "manual" __gc calling */
313318 return 1;
314319 }
315320
321 static int LOnig_internal_test (lua_State *L) {
322 unsigned int i;
323 for (i=1; i<sizeof(Encodings)/sizeof(Encodings[0]); i++) {
324 if (fcmp(&Encodings[i-1], &Encodings[i]) >= 0) {
325 lua_pushboolean(L, 0);
326 lua_pushstring(L, "Array 'Encodings' is not properly sorted.");
327 return 2;
328 }
329 }
330 for (i=1; i<sizeof(Syntaxes)/sizeof(Syntaxes[0]); i++) {
331 if (fcmp(&Syntaxes[i-1], &Syntaxes[i]) >= 0) {
332 lua_pushboolean(L, 0);
333 lua_pushstring(L, "Array 'Syntaxes' is not properly sorted.");
334 return 2;
335 }
336 }
337 lua_pushboolean(L, 1);
338 return 1;
339 }
340
316341 static const luaL_Reg r_methods[] = {
317342 { "exec", algm_exec },
318343 { "tfind", algm_tfind }, /* old name: match */
319344 { "find", algm_find },
320345 { "match", algm_match },
346 { "capturecount", LOnig_capturecount },
321347 { "__gc", LOnig_gc },
322348 { "__tostring", LOnig_tostring },
323349 { NULL, NULL }
328354 { "find", algf_find },
329355 { "gmatch", algf_gmatch },
330356 { "gsub", algf_gsub },
357 { "count", algf_count },
331358 { "split", algf_split },
332359 { "new", algf_new },
333360 { "flags", LOnig_get_flags },
334361 { "version", LOnig_version },
335362 { "setdefaultsyntax", LOnig_setdefaultsyntax },
363 { "internal_test", LOnig_internal_test },
336364 { NULL, NULL }
337365 };
338366
44 #include <string.h>
55 #include <locale.h>
66 #include <ctype.h>
7 #include <stdint.h>
78 #include <pcre.h>
89
910 #include "lua.h"
4748 lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n))
4849
4950 #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
50 (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
51 (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
5152
5253 #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1)
5354 #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n))
5657
5758 #define ALG_BASE(st) 0
5859 #define ALG_PULL
59 #define ALG_USERETRY
6060
6161 typedef struct {
6262 pcre * pr;
8585 /* Functions
8686 ******************************************************************************
8787 */
88
89 static void push_chartables_meta (lua_State *L) {
90 lua_pushinteger (L, INDEX_CHARTABLES_META);
91 lua_rawget (L, ALG_ENVIRONINDEX);
92 }
9388
9489 static int getcflags (lua_State *L, int pos) {
9590 switch (lua_type (L, pos)) {
131126 *ud = check_ud (L);
132127 argE->text = luaL_checklstring (L, 2, &argE->textlen);
133128 argE->startoffset = get_startoffset (L, 3, argE->textlen);
134 argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT);
135 argE->ovecsize = luaL_optint (L, 5, 100);
136 argE->wscount = luaL_optint (L, 6, 50);
137 }
138 #endif
129 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
130 argE->ovecsize = (size_t)luaL_optinteger (L, 5, 100);
131 argE->wscount = (size_t)luaL_optinteger (L, 6, 50);
132 }
133 #endif
134
135 static void push_chartables_meta (lua_State *L) {
136 lua_pushinteger (L, INDEX_CHARTABLES_META);
137 lua_rawget (L, ALG_ENVIRONINDEX);
138 }
139139
140140 static int Lpcre_maketables (lua_State *L) {
141141 *(const void**)lua_newuserdata (L, sizeof(void*)) = pcre_maketables();
168168 return 0;
169169 }
170170
171 static int chartables_tostring (lua_State *L) {
172 void **ud = check_chartables (L, 1);
173 lua_pushfstring (L, "%s (%p)", chartables_typename, ud);
174 return 1;
175 }
176
171177 static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) {
172178 argC->locale = NULL;
173179 argC->tables = NULL;
176182 argC->locale = lua_tostring (L, pos);
177183 else {
178184 argC->tablespos = pos;
179 argC->tables = *check_chartables (L, pos);
185 argC->tables = (const unsigned char*) *check_chartables (L, pos);
180186 }
181187 }
182188 }
295301 }
296302 #endif /* #if PCRE_MAJOR >= 6 */
297303
298 #ifdef ALG_USERETRY
299 static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry) {
300 int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags;
301 return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
302 argE->startoffset, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
303 }
304 #else
305 static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
306 return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
307 argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
308 }
309 #endif
304 static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
305 return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
306 argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
307 }
310308
311309 static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
312310 lua_pushlstring (L, argE->text, argE->textlen);
317315 argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
318316 }
319317
320 #ifdef ALG_USERETRY
321 static int gsub_exec (TPcre *ud, TArgExec *argE, int st, int retry) {
322 int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags;
323 return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
324 st, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
325 }
326 #else
327 static int gsub_exec (TPcre *ud, TArgExec *argE, int st) {
328 return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
329 st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
330 }
331 #endif
318 static int gsub_exec (TPcre *ud, TArgExec *argE, int st) {
319 return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
320 st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
321 }
332322
333323 static int split_exec (TPcre *ud, TArgExec *argE, int offset) {
334324 return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset,
356346 return 1;
357347 }
358348
359 static int chartables_tostring (lua_State *L) {
360 void **ud = check_chartables (L, 1);
361 lua_pushfstring (L, "%s (%p)", chartables_typename, ud);
362 return 1;
363 }
364
365349 static int Lpcre_version (lua_State *L) {
366350 lua_pushstring (L, pcre_version ());
351 return 1;
352 }
353
354 #define SET_INFO_FIELD(L,ud,what,name,valtype) { \
355 valtype val; \
356 if (0 == pcre_fullinfo (ud->pr, ud->extra, what, &val)) { \
357 lua_pushnumber (L, val); \
358 lua_setfield (L, -2, name); \
359 } \
360 }
361
362 static int Lpcre_fullinfo (lua_State *L) {
363 TPcre *ud = check_ud (L);
364 lua_newtable(L);
365
366 SET_INFO_FIELD (L, ud, PCRE_INFO_BACKREFMAX, "BACKREFMAX", int)
367 SET_INFO_FIELD (L, ud, PCRE_INFO_CAPTURECOUNT, "CAPTURECOUNT", int)
368 SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTBYTE, "FIRSTBYTE", int)
369 SET_INFO_FIELD (L, ud, PCRE_INFO_HASCRORLF, "HASCRORLF", int)
370 SET_INFO_FIELD (L, ud, PCRE_INFO_JCHANGED, "JCHANGED", int)
371 #ifdef PCRE_INFO_JIT
372 SET_INFO_FIELD (L, ud, PCRE_INFO_JIT, "JIT", int)
373 #endif
374 #ifdef PCRE_INFO_JITSIZE
375 SET_INFO_FIELD (L, ud, PCRE_INFO_JITSIZE, "JITSIZE", size_t);
376 #endif
377 #ifdef PCRE_INFO_MATCH_EMPTY
378 SET_INFO_FIELD (L, ud, PCRE_INFO_MATCH_EMPTY, "MATCH_EMPTY", int)
379 #endif
380 #ifdef PCRE_INFO_MATCHLIMIT
381 SET_INFO_FIELD (L, ud, PCRE_INFO_MATCHLIMIT, "MATCHLIMIT", uint32_t)
382 #endif
383 #ifdef PCRE_INFO_MAXLOOKBEHIND
384 SET_INFO_FIELD (L, ud, PCRE_INFO_MAXLOOKBEHIND, "MAXLOOKBEHIND", int) /* int ? */
385 #endif
386 #ifdef PCRE_INFO_MINLENGTH
387 SET_INFO_FIELD (L, ud, PCRE_INFO_MINLENGTH, "MINLENGTH", int)
388 #endif
389 SET_INFO_FIELD (L, ud, PCRE_INFO_OKPARTIAL, "OKPARTIAL", int)
390 SET_INFO_FIELD (L, ud, PCRE_INFO_OPTIONS, "OPTIONS", unsigned long)
391 #ifdef PCRE_INFO_RECURSIONLIMIT
392 SET_INFO_FIELD (L, ud, PCRE_INFO_RECURSIONLIMIT, "RECURSIONLIMIT", uint32_t)
393 #endif
394 SET_INFO_FIELD (L, ud, PCRE_INFO_SIZE, "SIZE", size_t)
395 SET_INFO_FIELD (L, ud, PCRE_INFO_STUDYSIZE, "STUDYSIZE", size_t)
396 #ifdef PCRE_INFO_FIRSTCHARACTERFLAGS
397 SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTCHARACTERFLAGS, "FIRSTCHARACTERFLAGS", int)
398 #endif
399 #ifdef PCRE_INFO_FIRSTCHARACTER
400 SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTCHARACTER, "FIRSTCHARACTER", uint32_t)
401 #endif
402 #ifdef PCRE_INFO_REQUIREDCHARFLAGS
403 SET_INFO_FIELD (L, ud, PCRE_INFO_REQUIREDCHARFLAGS, "REQUIREDCHARFLAGS", int)
404 #endif
405 #ifdef PCRE_INFO_REQUIREDCHAR
406 SET_INFO_FIELD (L, ud, PCRE_INFO_REQUIREDCHAR, "REQUIREDCHAR", uint32_t)
407 #endif
408
367409 return 1;
368410 }
369411
381423 #if PCRE_MAJOR >= 6
382424 { "dfa_exec", Lpcre_dfa_exec },
383425 #endif
426 { "fullinfo", Lpcre_fullinfo },
384427 { "__gc", Lpcre_gc },
385428 { "__tostring", Lpcre_tostring },
386429 { NULL, NULL }
391434 { "find", algf_find },
392435 { "gmatch", algf_gmatch },
393436 { "gsub", algf_gsub },
437 { "count", algf_count },
394438 { "split", algf_split },
395439 { "new", algf_new },
396440 { "flags", Lpcre_get_flags },
0 /* lpcre2.c - Lua binding of PCRE2 library */
1 /* See Copyright Notice in the file LICENSE */
2
3 #include <stdlib.h>
4 #include <string.h>
5 #include <locale.h>
6 #include <ctype.h>
7 #include <stdint.h>
8 #include <pcre2.h>
9
10 #include "lua.h"
11 #include "lauxlib.h"
12 #include "../common.h"
13
14 extern int Lpcre2_get_flags (lua_State *L);
15 extern int Lpcre2_config (lua_State *L);
16 extern flag_pair pcre2_error_flags[];
17
18 /* These 2 settings may be redefined from the command-line or the makefile.
19 * They should be kept in sync between themselves and with the target name.
20 */
21 #ifndef REX_LIBNAME
22 # define REX_LIBNAME "rex_pcre2"
23 #endif
24 #ifndef REX_OPENLIB
25 # define REX_OPENLIB luaopen_rex_pcre2
26 #endif
27
28 #define REX_TYPENAME REX_LIBNAME"_regex"
29
30 #define ALG_CFLAGS_DFLT 0
31 #define ALG_EFLAGS_DFLT 0
32
33 static int getcflags (lua_State *L, int pos);
34 #define ALG_GETCFLAGS(L,pos) getcflags(L, pos)
35
36 static void checkarg_compile (lua_State *L, int pos, TArgComp *argC);
37 #define ALG_GETCARGS(a,b,c) checkarg_compile(a,b,c)
38
39 #define ALG_NOMATCH(res) ((res) == PCRE2_ERROR_NOMATCH)
40 #define ALG_ISMATCH(res) ((res) >= 0)
41 #define ALG_SUBBEG(ud,n) ((int)(ud)->ovector[(n)+(n)])
42 #define ALG_SUBEND(ud,n) ((int)(ud)->ovector[(n)+(n)+1])
43 #define ALG_SUBLEN(ud,n) (ALG_SUBEND((ud),(n)) - ALG_SUBBEG((ud),(n)))
44 #define ALG_SUBVALID(ud,n) (0 == pcre2_substring_length_bynumber((ud)->match_data, (n), NULL))
45 #define ALG_NSUB(ud) ((int)(ud)->ncapt)
46
47 #define ALG_PUSHSUB(L,ud,text,n) \
48 lua_pushlstring (L, (text) + ALG_SUBBEG((ud),(n)), ALG_SUBLEN((ud),(n)))
49
50 #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
51 (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
52
53 #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1)
54 #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n))
55 #define ALG_PUSHOFFSETS(L,ud,offs,n) \
56 (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n))
57
58 #define ALG_BASE(st) 0
59 #define ALG_PULL
60
61 typedef struct {
62 pcre2_code *pr;
63 pcre2_compile_context *ccontext;
64 pcre2_match_data *match_data;
65 PCRE2_SIZE *ovector;
66 int ncapt;
67 const unsigned char *tables;
68 int freed;
69 } TPcre2;
70
71 #define TUserdata TPcre2
72
73 static void do_named_subpatterns (lua_State *L, TPcre2 *ud, const char *text);
74 # define DO_NAMED_SUBPATTERNS do_named_subpatterns
75
76 #include "../algo.h"
77
78 /* Locations of the 2 permanent tables in the function environment */
79 #define INDEX_CHARTABLES_META 1 /* chartables type's metatable */
80 #define INDEX_CHARTABLES_LINK 2 /* link chartables to compiled regex */
81
82 const char chartables_typename[] = "chartables";
83
84 /* Functions
85 ******************************************************************************
86 */
87
88 static int push_error_message (lua_State *L, int errorcode) //### is this function needed?
89 {
90 PCRE2_UCHAR buf[256];
91 if (pcre2_get_error_message(errorcode, buf, 256) > 0)
92 {
93 lua_pushstring(L, (const char*)buf);
94 return 1;
95 }
96 return 0;
97 }
98
99 static int getcflags (lua_State *L, int pos) {
100 switch (lua_type (L, pos)) {
101 case LUA_TNONE:
102 case LUA_TNIL:
103 return ALG_CFLAGS_DFLT;
104 case LUA_TNUMBER:
105 return lua_tointeger (L, pos);
106 case LUA_TSTRING: {
107 const char *s = lua_tostring (L, pos);
108 int res = 0, ch;
109 while ((ch = *s++) != '\0') {
110 if (ch == 'i') res |= PCRE2_CASELESS;
111 else if (ch == 'm') res |= PCRE2_MULTILINE;
112 else if (ch == 's') res |= PCRE2_DOTALL;
113 else if (ch == 'x') res |= PCRE2_EXTENDED;
114 else if (ch == 'U') res |= PCRE2_UNGREEDY;
115 //else if (ch == 'X') res |= PCRE2_EXTRA; //### does not exist in PCRE2 -> reflect in manual
116 }
117 return res;
118 }
119 default:
120 return luaL_typerror (L, pos, "number or string");
121 }
122 }
123
124 static int generate_error (lua_State *L, const TPcre2 *ud, int errcode) {
125 const char *key = get_flag_key (pcre2_error_flags, errcode);
126 (void) ud;
127 if (key)
128 return luaL_error (L, "error PCRE2_%s", key);
129 else
130 return luaL_error (L, "PCRE2 error code %d", errcode);
131 }
132
133 /* method r:dfa_exec (s, [st], [ef], [ovecsize], [wscount]) */
134 static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TPcre2 **ud) {
135 *ud = check_ud (L);
136 argE->text = luaL_checklstring (L, 2, &argE->textlen);
137 argE->startoffset = get_startoffset (L, 3, argE->textlen);
138 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
139 argE->ovecsize = (size_t)luaL_optinteger (L, 5, 100);
140 argE->wscount = (size_t)luaL_optinteger (L, 6, 50);
141 }
142
143 static void push_chartables_meta (lua_State *L) {
144 lua_pushinteger (L, INDEX_CHARTABLES_META);
145 lua_rawget (L, ALG_ENVIRONINDEX);
146 }
147
148 static int Lpcre2_maketables (lua_State *L) {
149 *(const void**)lua_newuserdata (L, sizeof(void*)) = pcre2_maketables(NULL); //### argument NULL
150 push_chartables_meta (L);
151 lua_setmetatable (L, -2);
152 return 1;
153 }
154
155 static void **check_chartables (lua_State *L, int pos) {
156 void **q;
157 /* Compare the metatable against the C function environment. */
158 if (lua_getmetatable(L, pos)) {
159 push_chartables_meta (L);
160 if (lua_rawequal(L, -1, -2) &&
161 (q = (void **)lua_touserdata(L, pos)) != NULL) {
162 lua_pop(L, 2);
163 return q;
164 }
165 }
166 luaL_argerror(L, pos, lua_pushfstring (L, "not a %s", chartables_typename));
167 return NULL;
168 }
169
170 static int chartables_gc (lua_State *L) {
171 void **ud = check_chartables (L, 1);
172 if (*ud) {
173 free (*ud); //### free() should be called only if pcre2_maketables was called with NULL argument
174 *ud = NULL;
175 }
176 return 0;
177 }
178
179 static int chartables_tostring (lua_State *L) {
180 void **ud = check_chartables (L, 1);
181 lua_pushfstring (L, "%s (%p)", chartables_typename, ud);
182 return 1;
183 }
184
185 static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) {
186 argC->locale = NULL;
187 argC->tables = NULL;
188 if (!lua_isnoneornil (L, pos)) {
189 if (lua_isstring (L, pos))
190 argC->locale = lua_tostring (L, pos);
191 else {
192 argC->tablespos = pos;
193 argC->tables = (const unsigned char*) *check_chartables (L, pos);
194 }
195 }
196 }
197
198 static int compile_regex (lua_State *L, const TArgComp *argC, TPcre2 **pud) {
199 int errcode;
200 PCRE2_SIZE erroffset;
201 TPcre2 *ud;
202
203 ud = (TPcre2*)lua_newuserdata (L, sizeof (TPcre2));
204 memset (ud, 0, sizeof (TPcre2)); /* initialize all members to 0 */
205 lua_pushvalue (L, ALG_ENVIRONINDEX);
206 lua_setmetatable (L, -2);
207
208 ud->ccontext = pcre2_compile_context_create(NULL);
209 if (ud->ccontext == NULL)
210 return luaL_error (L, "malloc failed");
211
212 if (argC->locale) {
213 char old_locale[256];
214 strcpy (old_locale, setlocale (LC_CTYPE, NULL)); /* store the locale */
215 if (NULL == setlocale (LC_CTYPE, argC->locale)) /* set new locale */
216 return luaL_error (L, "cannot set locale");
217 ud->tables = pcre2_maketables (NULL); /* make tables with new locale */ //### argument NULL
218 pcre2_set_character_tables(ud->ccontext, ud->tables);
219 setlocale (LC_CTYPE, old_locale); /* restore the old locale */
220 }
221 else if (argC->tables) {
222 pcre2_set_character_tables(ud->ccontext, argC->tables);
223 lua_pushinteger (L, INDEX_CHARTABLES_LINK);
224 lua_rawget (L, ALG_ENVIRONINDEX);
225 lua_pushvalue (L, -2);
226 lua_pushvalue (L, argC->tablespos);
227 lua_rawset (L, -3);
228 lua_pop (L, 1);
229 }
230
231 ud->pr = pcre2_compile ((PCRE2_SPTR)argC->pattern, argC->patlen, argC->cflags, &errcode,
232 &erroffset, ud->ccontext); //### DOUBLE-CHECK ALL ARGUMENTS
233 if (!ud->pr) {
234 if (push_error_message(L, errcode))
235 return luaL_error (L, "%s (pattern offset: %d)", lua_tostring(L,-1), erroffset + 1);
236 else
237 return luaL_error (L, "%s (pattern offset: %d)", "pattern compile error", erroffset + 1);
238 }
239
240 if (0 != pcre2_pattern_info (ud->pr, PCRE2_INFO_CAPTURECOUNT, &ud->ncapt)) //###
241 return luaL_error (L, "could not get pattern info");
242
243 /* need (2 ints per capture, plus one for substring match) * 3/2 */
244 ud->match_data = pcre2_match_data_create(ud->ncapt+1, NULL); //### CHECK ALL
245 if (!ud->match_data)
246 return luaL_error (L, "malloc failed");
247
248 ud->ovector = pcre2_get_ovector_pointer(ud->match_data);
249
250 if (pud) *pud = ud;
251 return 1;
252 }
253
254 /* the target table must be on lua stack top */
255 static void do_named_subpatterns (lua_State *L, TPcre2 *ud, const char *text) {
256 int i, namecount, name_entry_size;
257 unsigned char *name_table;
258 PCRE2_SPTR tabptr;
259
260 /* do named subpatterns - NJG */
261 pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMECOUNT, &namecount);
262 if (namecount <= 0)
263 return;
264 pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMETABLE, &name_table);
265 pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
266 tabptr = name_table;
267 for (i = 0; i < namecount; i++) {
268 int n = (tabptr[0] << 8) | tabptr[1]; /* number of the capturing parenthesis */
269 if (n > 0 && n <= ALG_NSUB(ud)) { /* check range */
270 lua_pushstring (L, (char *)tabptr + 2); /* name of the capture, zero terminated */
271 ALG_PUSHSUB_OR_FALSE (L, ud, text, n);
272 lua_rawset (L, -3);
273 }
274 tabptr += name_entry_size;
275 }
276 }
277
278 static int Lpcre2_dfa_exec (lua_State *L)
279 {
280 TArgExec argE;
281 TPcre2 *ud;
282 int res;
283 int *wspace;
284 size_t wsize;
285
286 checkarg_dfa_exec (L, &argE, &ud);
287 wsize = argE.wscount * sizeof(int);
288 wspace = (int*) Lmalloc (L, wsize);
289 if (!wspace)
290 luaL_error (L, "malloc failed");
291
292 ud->match_data = pcre2_match_data_create(argE.ovecsize/2, NULL); //### CHECK ALL
293 if (!ud->match_data)
294 return luaL_error (L, "malloc failed");
295
296 res = pcre2_dfa_match (ud->pr, (PCRE2_SPTR)argE.text, argE.textlen, argE.startoffset,
297 argE.eflags, ud->match_data, NULL, wspace, argE.wscount); //### CHECK ALL
298
299 if (ALG_ISMATCH (res) || res == PCRE2_ERROR_PARTIAL) {
300 int i;
301 int max = (res>0) ? res : (res==0) ? (int)argE.ovecsize/2 : 1;
302 PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(ud->match_data);
303
304 lua_pushinteger (L, ovector[0] + 1); /* 1-st return value */
305 lua_newtable (L); /* 2-nd return value */
306 for (i=0; i<max; i++) {
307 lua_pushinteger (L, ovector[i+i+1]);
308 lua_rawseti (L, -2, i+1);
309 }
310 lua_pushinteger (L, res); /* 3-rd return value */
311 Lfree (L, wspace, wsize);
312 return 3;
313 }
314 else {
315 Lfree (L, wspace, wsize);
316 if (ALG_NOMATCH (res))
317 return lua_pushnil (L), 1;
318 else
319 return generate_error (L, ud, res);
320 }
321 }
322
323 static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
324 return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen,
325 argE->startoffset, argE->eflags, ud->match_data, NULL); //###
326 }
327
328 static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
329 lua_pushlstring (L, argE->text, argE->textlen);
330 }
331
332 static int findmatch_exec (TPcre2 *ud, TArgExec *argE) {
333 return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen,
334 argE->startoffset, argE->eflags, ud->match_data, NULL); //###
335 }
336
337 static int gsub_exec (TPcre2 *ud, TArgExec *argE, int st) {
338 return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen,
339 st, argE->eflags, ud->match_data, NULL); //###
340 }
341
342 static int split_exec (TPcre2 *ud, TArgExec *argE, int offset) {
343 return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen,
344 offset, argE->eflags, ud->match_data, NULL); //###
345 }
346
347 static int Lpcre2_gc (lua_State *L) {
348 TPcre2 *ud = check_ud (L);
349 if (ud->freed == 0) { /* precaution against "manual" __gc calling */
350 ud->freed = 1;
351 if (ud->pr) pcre2_code_free (ud->pr);
352 //if (ud->tables) pcre_free ((void *)ud->tables); //###
353 if (ud->ccontext) pcre2_compile_context_free (ud->ccontext);
354 if (ud->match_data) pcre2_match_data_free (ud->match_data);
355 }
356 return 0;
357 }
358
359 static int Lpcre2_tostring (lua_State *L) {
360 TPcre2 *ud = check_ud (L);
361 if (ud->freed == 0)
362 lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud);
363 else
364 lua_pushfstring (L, "%s (deleted)", REX_TYPENAME);
365 return 1;
366 }
367
368 static int Lpcre2_version (lua_State *L) {
369 char buf[64];
370 pcre2_config(PCRE2_CONFIG_VERSION, buf);
371 lua_pushstring (L, buf);
372 return 1;
373 }
374
375 //### TODO: document this method.
376 //### TODO: write tests for this method.
377 static int Lpcre2_jit_compile (lua_State *L) {
378 TPcre2 *ud = check_ud (L);
379 uint32_t options = (uint32_t) luaL_optinteger (L, 2, PCRE2_JIT_COMPLETE);
380 int errcode = pcre2_jit_compile (ud->pr, options);
381 if (errcode == 0) {
382 lua_pushboolean(L, 1);
383 return 1;
384 }
385 lua_pushboolean(L, 0);
386 return 1 + push_error_message(L, errcode);
387 }
388
389 #define SET_INFO_FIELD(L,ud,what,name,valtype) { \
390 valtype val; \
391 if (0 == pcre2_pattern_info (ud->pr, what, &val)) { \
392 lua_pushnumber (L, val); \
393 lua_setfield (L, -2, name); \
394 } \
395 }
396
397 static int Lpcre2_pattern_info (lua_State *L) {
398 TPcre2 *ud = check_ud (L);
399 lua_newtable(L);
400
401 SET_INFO_FIELD (L, ud, PCRE2_INFO_ALLOPTIONS, "ALLOPTIONS", uint32_t)
402 SET_INFO_FIELD (L, ud, PCRE2_INFO_ARGOPTIONS, "ARGOPTIONS", uint32_t)
403 SET_INFO_FIELD (L, ud, PCRE2_INFO_BACKREFMAX, "BACKREFMAX", uint32_t)
404 SET_INFO_FIELD (L, ud, PCRE2_INFO_BSR, "BSR", uint32_t)
405 SET_INFO_FIELD (L, ud, PCRE2_INFO_CAPTURECOUNT, "CAPTURECOUNT", uint32_t)
406 //### SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTBITMAP, "FIRSTBITMAP", ???)
407 SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTCODETYPE, "FIRSTCODETYPE", uint32_t)
408 SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTCODEUNIT, "FIRSTCODEUNIT", uint32_t)
409 SET_INFO_FIELD (L, ud, PCRE2_INFO_HASBACKSLASHC, "HASBACKSLASHC", uint32_t)
410 SET_INFO_FIELD (L, ud, PCRE2_INFO_HASCRORLF, "HASCRORLF", uint32_t)
411 SET_INFO_FIELD (L, ud, PCRE2_INFO_JCHANGED, "JCHANGED", uint32_t)
412 SET_INFO_FIELD (L, ud, PCRE2_INFO_JITSIZE, "JITSIZE", size_t)
413 SET_INFO_FIELD (L, ud, PCRE2_INFO_LASTCODETYPE, "LASTCODETYPE", uint32_t)
414 SET_INFO_FIELD (L, ud, PCRE2_INFO_LASTCODEUNIT, "LASTCODEUNIT", uint32_t)
415 SET_INFO_FIELD (L, ud, PCRE2_INFO_MATCHEMPTY, "MATCHEMPTY", uint32_t)
416 SET_INFO_FIELD (L, ud, PCRE2_INFO_MATCHLIMIT, "MATCHLIMIT", uint32_t)
417 SET_INFO_FIELD (L, ud, PCRE2_INFO_MAXLOOKBEHIND, "MAXLOOKBEHIND", uint32_t)
418 SET_INFO_FIELD (L, ud, PCRE2_INFO_MINLENGTH, "MINLENGTH", uint32_t)
419 SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMECOUNT, "NAMECOUNT", uint32_t)
420 SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMEENTRYSIZE, "NAMEENTRYSIZE", uint32_t)
421 //### SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMETABLE, "NAMETABLE", ???)
422 SET_INFO_FIELD (L, ud, PCRE2_INFO_NEWLINE, "NEWLINE", uint32_t)
423 SET_INFO_FIELD (L, ud, PCRE2_INFO_RECURSIONLIMIT, "RECURSIONLIMIT", uint32_t)
424 SET_INFO_FIELD (L, ud, PCRE2_INFO_SIZE, "SIZE", size_t)
425
426 return 1;
427 }
428
429 static const luaL_Reg chartables_meta[] = {
430 { "__gc", chartables_gc },
431 { "__tostring", chartables_tostring },
432 { NULL, NULL }
433 };
434
435 static const luaL_Reg r_methods[] = {
436 { "exec", algm_exec },
437 { "tfind", algm_tfind }, /* old name: match */
438 { "find", algm_find },
439 { "match", algm_match },
440 { "dfa_exec", Lpcre2_dfa_exec },
441 { "patterninfo", Lpcre2_pattern_info }, //### document name change: fullinfo -> patterninfo
442 { "fullinfo", Lpcre2_pattern_info }, //### compatibility name
443 { "jit_compile", Lpcre2_jit_compile },
444 { "__gc", Lpcre2_gc },
445 { "__tostring", Lpcre2_tostring },
446 { NULL, NULL }
447 };
448
449 static const luaL_Reg r_functions[] = {
450 { "match", algf_match },
451 { "find", algf_find },
452 { "gmatch", algf_gmatch },
453 { "gsub", algf_gsub },
454 { "count", algf_count },
455 { "split", algf_split },
456 { "new", algf_new },
457 { "flags", Lpcre2_get_flags },
458 { "version", Lpcre2_version },
459 { "maketables", Lpcre2_maketables },
460 { "config", Lpcre2_config },
461 { NULL, NULL }
462 };
463
464 /* Open the library */
465 REX_API int REX_OPENLIB (lua_State *L) {
466 char buf_ver[64];
467 pcre2_config(PCRE2_CONFIG_VERSION, buf_ver);
468 if (PCRE2_MAJOR > atoi (buf_ver)) {
469 return luaL_error (L, "%s requires at least version %d of PCRE2 library",
470 REX_LIBNAME, (int)PCRE2_MAJOR);
471 }
472
473 alg_register(L, r_methods, r_functions, "PCRE2");
474
475 /* create a table and register it as a metatable for "chartables" userdata */
476 lua_newtable (L);
477 lua_pushliteral (L, "access denied");
478 lua_setfield (L, -2, "__metatable");
479 #if LUA_VERSION_NUM == 501
480 luaL_register (L, NULL, chartables_meta);
481 lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_META);
482 #else
483 lua_pushvalue(L, -3);
484 luaL_setfuncs (L, chartables_meta, 1);
485 lua_rawseti (L, -3, INDEX_CHARTABLES_META);
486 #endif
487
488 /* create a table for connecting "chartables" userdata to "regex" userdata */
489 lua_newtable (L);
490 lua_pushliteral (L, "k"); /* weak keys */
491 lua_setfield (L, -2, "__mode");
492 lua_pushvalue (L, -1); /* setmetatable (tb, tb) */
493 lua_setmetatable (L, -2);
494 #if LUA_VERSION_NUM == 501
495 lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_LINK);
496 #else
497 lua_rawseti (L, -3, INDEX_CHARTABLES_LINK);
498 #endif
499
500 return 1;
501 }
0 /* lpcre2_f.c - Lua binding of PCRE2 library */
1 /* See Copyright Notice in the file LICENSE */
2
3 #include <pcre2.h>
4 #include "lua.h"
5 #include "lauxlib.h"
6 #include "../common.h"
7
8 #define VERSION_PCRE2 (PCRE2_MAJOR*100 + PCRE2_MINOR)
9
10 static flag_pair pcre2_flags[] = {
11 { "MAJOR", PCRE2_MAJOR },
12 { "MINOR", PCRE2_MINOR },
13 /*---------------------------------------------------------------------------*/
14 { "ANCHORED", PCRE2_ANCHORED },
15 { "NO_UTF_CHECK", PCRE2_NO_UTF_CHECK },
16 { "ALLOW_EMPTY_CLASS", PCRE2_ALLOW_EMPTY_CLASS },
17 { "ALT_BSUX", PCRE2_ALT_BSUX },
18 { "AUTO_CALLOUT", PCRE2_AUTO_CALLOUT },
19 { "CASELESS", PCRE2_CASELESS },
20 { "DOLLAR_ENDONLY", PCRE2_DOLLAR_ENDONLY },
21 { "DOTALL", PCRE2_DOTALL },
22 { "DUPNAMES", PCRE2_DUPNAMES },
23 { "EXTENDED", PCRE2_EXTENDED },
24 { "FIRSTLINE", PCRE2_FIRSTLINE },
25 { "MATCH_UNSET_BACKREF", PCRE2_MATCH_UNSET_BACKREF },
26 { "MULTILINE", PCRE2_MULTILINE },
27 { "NEVER_UCP", PCRE2_NEVER_UCP },
28 { "NEVER_UTF", PCRE2_NEVER_UTF },
29 { "NO_AUTO_CAPTURE", PCRE2_NO_AUTO_CAPTURE },
30 { "NO_AUTO_POSSESS", PCRE2_NO_AUTO_POSSESS },
31 { "NO_DOTSTAR_ANCHOR", PCRE2_NO_DOTSTAR_ANCHOR },
32 { "NO_START_OPTIMIZE", PCRE2_NO_START_OPTIMIZE },
33 { "UCP", PCRE2_UCP },
34 { "UNGREEDY", PCRE2_UNGREEDY },
35 { "UTF", PCRE2_UTF },
36 { "NEVER_BACKSLASH_C", PCRE2_NEVER_BACKSLASH_C },
37 { "ALT_CIRCUMFLEX", PCRE2_ALT_CIRCUMFLEX },
38 { "ALT_VERBNAMES", PCRE2_ALT_VERBNAMES },
39 { "USE_OFFSET_LIMIT", PCRE2_USE_OFFSET_LIMIT },
40 { "JIT_COMPLETE", PCRE2_JIT_COMPLETE },
41 { "JIT_PARTIAL_SOFT", PCRE2_JIT_PARTIAL_SOFT },
42 { "JIT_PARTIAL_HARD", PCRE2_JIT_PARTIAL_HARD },
43 { "NOTBOL", PCRE2_NOTBOL },
44 { "NOTEOL", PCRE2_NOTEOL },
45 { "NOTEMPTY", PCRE2_NOTEMPTY },
46 { "NOTEMPTY_ATSTART", PCRE2_NOTEMPTY_ATSTART },
47 { "PARTIAL_SOFT", PCRE2_PARTIAL_SOFT },
48 { "PARTIAL_HARD", PCRE2_PARTIAL_HARD },
49 { "DFA_RESTART", PCRE2_DFA_RESTART },
50 { "DFA_SHORTEST", PCRE2_DFA_SHORTEST },
51 { "SUBSTITUTE_GLOBAL", PCRE2_SUBSTITUTE_GLOBAL },
52 { "SUBSTITUTE_EXTENDED", PCRE2_SUBSTITUTE_EXTENDED },
53 { "SUBSTITUTE_UNSET_EMPTY", PCRE2_SUBSTITUTE_UNSET_EMPTY },
54 { "SUBSTITUTE_UNKNOWN_UNSET", PCRE2_SUBSTITUTE_UNKNOWN_UNSET },
55 { "SUBSTITUTE_OVERFLOW_LENGTH", PCRE2_SUBSTITUTE_OVERFLOW_LENGTH },
56 #ifdef PCRE2_NO_JIT
57 { "NO_JIT", PCRE2_NO_JIT },
58 #endif
59 { "NEWLINE_CR", PCRE2_NEWLINE_CR },
60 { "NEWLINE_LF", PCRE2_NEWLINE_LF },
61 { "NEWLINE_CRLF", PCRE2_NEWLINE_CRLF },
62 { "NEWLINE_ANY", PCRE2_NEWLINE_ANY },
63 { "NEWLINE_ANYCRLF", PCRE2_NEWLINE_ANYCRLF },
64 { "BSR_UNICODE", PCRE2_BSR_UNICODE },
65 { "BSR_ANYCRLF", PCRE2_BSR_ANYCRLF },
66 /*---------------------------------------------------------------------------*/
67 { "INFO_ALLOPTIONS", PCRE2_INFO_ALLOPTIONS },
68 { "INFO_ARGOPTIONS", PCRE2_INFO_ARGOPTIONS },
69 { "INFO_BACKREFMAX", PCRE2_INFO_BACKREFMAX },
70 { "INFO_BSR", PCRE2_INFO_BSR },
71 { "INFO_CAPTURECOUNT", PCRE2_INFO_CAPTURECOUNT },
72 { "INFO_FIRSTCODEUNIT", PCRE2_INFO_FIRSTCODEUNIT },
73 { "INFO_FIRSTCODETYPE", PCRE2_INFO_FIRSTCODETYPE },
74 { "INFO_FIRSTBITMAP", PCRE2_INFO_FIRSTBITMAP },
75 { "INFO_HASCRORLF", PCRE2_INFO_HASCRORLF },
76 { "INFO_JCHANGED", PCRE2_INFO_JCHANGED },
77 { "INFO_JITSIZE", PCRE2_INFO_JITSIZE },
78 { "INFO_LASTCODEUNIT", PCRE2_INFO_LASTCODEUNIT },
79 { "INFO_LASTCODETYPE", PCRE2_INFO_LASTCODETYPE },
80 { "INFO_MATCHEMPTY", PCRE2_INFO_MATCHEMPTY },
81 { "INFO_MATCHLIMIT", PCRE2_INFO_MATCHLIMIT },
82 { "INFO_MAXLOOKBEHIND", PCRE2_INFO_MAXLOOKBEHIND },
83 { "INFO_MINLENGTH", PCRE2_INFO_MINLENGTH },
84 { "INFO_NAMECOUNT", PCRE2_INFO_NAMECOUNT },
85 { "INFO_NAMEENTRYSIZE", PCRE2_INFO_NAMEENTRYSIZE },
86 { "INFO_NAMETABLE", PCRE2_INFO_NAMETABLE },
87 { "INFO_NEWLINE", PCRE2_INFO_NEWLINE },
88 { "INFO_RECURSIONLIMIT", PCRE2_INFO_RECURSIONLIMIT },
89 { "INFO_SIZE", PCRE2_INFO_SIZE },
90 { "INFO_HASBACKSLASHC", PCRE2_INFO_HASBACKSLASHC },
91 /*---------------------------------------------------------------------------*/
92 { NULL, 0 }
93 };
94
95 flag_pair pcre2_error_flags[] = {
96 { "ERROR_NOMATCH", PCRE2_ERROR_NOMATCH },
97 { "ERROR_PARTIAL", PCRE2_ERROR_PARTIAL },
98 { "ERROR_UTF8_ERR1", PCRE2_ERROR_UTF8_ERR1 },
99 { "ERROR_UTF8_ERR2", PCRE2_ERROR_UTF8_ERR2 },
100 { "ERROR_UTF8_ERR3", PCRE2_ERROR_UTF8_ERR3 },
101 { "ERROR_UTF8_ERR4", PCRE2_ERROR_UTF8_ERR4 },
102 { "ERROR_UTF8_ERR5", PCRE2_ERROR_UTF8_ERR5 },
103 { "ERROR_UTF8_ERR6", PCRE2_ERROR_UTF8_ERR6 },
104 { "ERROR_UTF8_ERR7", PCRE2_ERROR_UTF8_ERR7 },
105 { "ERROR_UTF8_ERR8", PCRE2_ERROR_UTF8_ERR8 },
106 { "ERROR_UTF8_ERR9", PCRE2_ERROR_UTF8_ERR9 },
107 { "ERROR_UTF8_ERR10", PCRE2_ERROR_UTF8_ERR10 },
108 { "ERROR_UTF8_ERR11", PCRE2_ERROR_UTF8_ERR11 },
109 { "ERROR_UTF8_ERR12", PCRE2_ERROR_UTF8_ERR12 },
110 { "ERROR_UTF8_ERR13", PCRE2_ERROR_UTF8_ERR13 },
111 { "ERROR_UTF8_ERR14", PCRE2_ERROR_UTF8_ERR14 },
112 { "ERROR_UTF8_ERR15", PCRE2_ERROR_UTF8_ERR15 },
113 { "ERROR_UTF8_ERR16", PCRE2_ERROR_UTF8_ERR16 },
114 { "ERROR_UTF8_ERR17", PCRE2_ERROR_UTF8_ERR17 },
115 { "ERROR_UTF8_ERR18", PCRE2_ERROR_UTF8_ERR18 },
116 { "ERROR_UTF8_ERR19", PCRE2_ERROR_UTF8_ERR19 },
117 { "ERROR_UTF8_ERR20", PCRE2_ERROR_UTF8_ERR20 },
118 { "ERROR_UTF8_ERR21", PCRE2_ERROR_UTF8_ERR21 },
119 { "ERROR_UTF16_ERR1", PCRE2_ERROR_UTF16_ERR1 },
120 { "ERROR_UTF16_ERR2", PCRE2_ERROR_UTF16_ERR2 },
121 { "ERROR_UTF16_ERR3", PCRE2_ERROR_UTF16_ERR3 },
122 { "ERROR_UTF32_ERR1", PCRE2_ERROR_UTF32_ERR1 },
123 { "ERROR_UTF32_ERR2", PCRE2_ERROR_UTF32_ERR2 },
124 { "ERROR_BADDATA", PCRE2_ERROR_BADDATA },
125 { "ERROR_MIXEDTABLES", PCRE2_ERROR_MIXEDTABLES },
126 { "ERROR_BADMAGIC", PCRE2_ERROR_BADMAGIC },
127 { "ERROR_BADMODE", PCRE2_ERROR_BADMODE },
128 { "ERROR_BADOFFSET", PCRE2_ERROR_BADOFFSET },
129 { "ERROR_BADOPTION", PCRE2_ERROR_BADOPTION },
130 { "ERROR_BADREPLACEMENT", PCRE2_ERROR_BADREPLACEMENT },
131 { "ERROR_BADUTFOFFSET", PCRE2_ERROR_BADUTFOFFSET },
132 { "ERROR_CALLOUT", PCRE2_ERROR_CALLOUT },
133 { "ERROR_DFA_BADRESTART", PCRE2_ERROR_DFA_BADRESTART },
134 { "ERROR_DFA_RECURSE", PCRE2_ERROR_DFA_RECURSE },
135 { "ERROR_DFA_UCOND", PCRE2_ERROR_DFA_UCOND },
136 { "ERROR_DFA_UFUNC", PCRE2_ERROR_DFA_UFUNC },
137 { "ERROR_DFA_UITEM", PCRE2_ERROR_DFA_UITEM },
138 { "ERROR_DFA_WSSIZE", PCRE2_ERROR_DFA_WSSIZE },
139 { "ERROR_INTERNAL", PCRE2_ERROR_INTERNAL },
140 { "ERROR_JIT_BADOPTION", PCRE2_ERROR_JIT_BADOPTION },
141 { "ERROR_JIT_STACKLIMIT", PCRE2_ERROR_JIT_STACKLIMIT },
142 { "ERROR_MATCHLIMIT", PCRE2_ERROR_MATCHLIMIT },
143 { "ERROR_NOMEMORY", PCRE2_ERROR_NOMEMORY },
144 { "ERROR_NOSUBSTRING", PCRE2_ERROR_NOSUBSTRING },
145 { "ERROR_NOUNIQUESUBSTRING", PCRE2_ERROR_NOUNIQUESUBSTRING },
146 { "ERROR_NULL", PCRE2_ERROR_NULL },
147 { "ERROR_RECURSELOOP", PCRE2_ERROR_RECURSELOOP },
148 { "ERROR_RECURSIONLIMIT", PCRE2_ERROR_RECURSIONLIMIT },
149 { "ERROR_UNAVAILABLE", PCRE2_ERROR_UNAVAILABLE },
150 { "ERROR_UNSET", PCRE2_ERROR_UNSET },
151 { "ERROR_BADOFFSETLIMIT", PCRE2_ERROR_BADOFFSETLIMIT },
152 { "ERROR_BADREPESCAPE", PCRE2_ERROR_BADREPESCAPE },
153 { "ERROR_REPMISSINGBRACE", PCRE2_ERROR_REPMISSINGBRACE },
154 { "ERROR_BADSUBSTITUTION", PCRE2_ERROR_BADSUBSTITUTION },
155 { "ERROR_BADSUBSPATTERN", PCRE2_ERROR_BADSUBSPATTERN },
156 { "ERROR_TOOMANYREPLACE", PCRE2_ERROR_TOOMANYREPLACE },
157 #ifdef PCRE2_ERROR_BADSERIALIZEDDATA
158 { "ERROR_BADSERIALIZEDDATA", PCRE2_ERROR_BADSERIALIZEDDATA },
159 #endif
160 /*---------------------------------------------------------------------------*/
161 { NULL, 0 }
162 };
163
164 static flag_pair pcre2_config_flags[] = {
165 { "PCRE2_CONFIG_BSR", PCRE2_CONFIG_BSR },
166 { "PCRE2_CONFIG_JIT", PCRE2_CONFIG_JIT },
167 { "PCRE2_CONFIG_JITTARGET", PCRE2_CONFIG_JITTARGET },
168 { "PCRE2_CONFIG_LINKSIZE", PCRE2_CONFIG_LINKSIZE },
169 { "PCRE2_CONFIG_MATCHLIMIT", PCRE2_CONFIG_MATCHLIMIT },
170 { "PCRE2_CONFIG_NEWLINE", PCRE2_CONFIG_NEWLINE },
171 { "PCRE2_CONFIG_PARENSLIMIT", PCRE2_CONFIG_PARENSLIMIT },
172 { "PCRE2_CONFIG_RECURSIONLIMIT", PCRE2_CONFIG_RECURSIONLIMIT },
173 { "PCRE2_CONFIG_STACKRECURSE", PCRE2_CONFIG_STACKRECURSE },
174 { "PCRE2_CONFIG_UNICODE", PCRE2_CONFIG_UNICODE },
175 { "PCRE2_CONFIG_UNICODE_VERSION", PCRE2_CONFIG_UNICODE_VERSION },
176 { "PCRE2_CONFIG_VERSION", PCRE2_CONFIG_VERSION },
177 /*---------------------------------------------------------------------------*/
178 { NULL, 0 }
179 };
180
181 int Lpcre2_config (lua_State *L) {
182 flag_pair *fp;
183 if (lua_istable (L, 1))
184 lua_settop (L, 1);
185 else
186 lua_newtable (L);
187 for (fp = pcre2_config_flags; fp->key; ++fp) {
188 if (fp->val == PCRE2_CONFIG_JITTARGET) {
189 #if PCRE2_CODE_UNIT_WIDTH == 8
190 char buf[64];
191 if (PCRE2_ERROR_BADOPTION != pcre2_config (fp->val, buf)) {
192 lua_pushstring (L, buf);
193 lua_setfield (L, -2, fp->key);
194 }
195 #endif
196 }
197 else {
198 int val;
199 if (0 == pcre2_config (fp->val, &val)) {
200 lua_pushinteger (L, val);
201 lua_setfield (L, -2, fp->key);
202 }
203 }
204 }
205 return 1;
206 }
207
208 int Lpcre2_get_flags (lua_State *L) {
209 const flag_pair* fps[] = { pcre2_flags, pcre2_error_flags, NULL };
210 return get_flags (L, fps);
211 }
212
6060 lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n))
6161
6262 #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
63 (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
63 (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
6464
6565 #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1)
6666 #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n))
6868 (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n))
6969
7070 #define ALG_BASE(st) (st)
71 #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT)
71 #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT)
7272
7373 typedef struct {
7474 regex_t r;
262262 { "find", algf_find },
263263 { "gmatch", algf_gmatch },
264264 { "gsub", algf_gsub },
265 { "count", algf_count },
265266 { "split", algf_split },
266267 { "new", algf_new },
267268 { "flags", Posix_get_flags },
3737 lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n))
3838
3939 #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
40 (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
40 (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
4141
4242 #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1)
4343 #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n))
4545 (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n))
4646
4747 #define ALG_BASE(st) (st)
48 #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT)
48 #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT)
4949
5050 typedef struct {
5151 regex_t r;
8484 argE->text = luaL_checklstring (L, 2, &argE->textlen);
8585 checkarg_regaparams (L, 3, argP);
8686 argE->startoffset = get_startoffset (L, 4, argE->textlen);
87 argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT);
87 argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT);
8888 }
8989
9090 static int generate_error (lua_State *L, const TPosix *ud, int errcode) {
330330 { "find", algf_find },
331331 { "gmatch", algf_gmatch },
332332 { "gsub", algf_gsub },
333 { "count", algf_count },
333334 { "match", algf_match },
334335 { "split", algf_split },
335336 { "config", Ltre_config },
4040 lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n))
4141
4242 #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
43 (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
43 (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
4444
4545 #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBBEG(ud,n))/ALG_CHARSIZE + 1)
4646 #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBEND(ud,n))/ALG_CHARSIZE)
4848 (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n))
4949
5050 #define ALG_BASE(st) (st)
51 #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT)
51 #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT)
5252
5353 typedef struct {
5454 regex_t r;
8787 argE->text = luaL_checklstring (L, 2, &argE->textlen);
8888 checkarg_regaparams (L, 3, argP);
8989 argE->startoffset = get_startoffset (L, 4, argE->textlen);
90 argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT);
90 argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT);
9191 }
9292
9393 static int generate_error (lua_State *L, const TPosix *ud, int errcode) {
211211 { "wfind", algf_find },
212212 { "wgmatch", algf_gmatch },
213213 { "wgsub", algf_gsub },
214 { "wcount", algf_count },
214215 { "wmatch", algf_match },
215216 { "wsplit", algf_split },
216217 { NULL, NULL }
00 To test Lrexlib, execute the following command line:
11
2 lua ./runtest.lua [-v] LIBRARY...
2 lua ./runtest.lua [-a] [-v] LIBRARY...
33
4 -a use the external "Alien" library for "buffer subject" tests,
5 rather than the internal function
46 -v gives verbose output
44
55 local luatest = require "luatest"
66 local N = luatest.NT
7 local unpack = unpack or table.unpack
78
89 local function norm(a) return a==nil and N or a end
910
3132 --{ subj patt results }
3233 { {"ab", lib.new"."}, {{"a",N}, {"b",N} } },
3334 { {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} },
34 { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match
35 { {"abcd", ".*" }, {{"abcd",N} } },--zero-length match
3536 { {"abc", "^." }, {{"a",N}} },--anchored pattern
37 }
38 end
39
40 local function set_f_count (lib, flg)
41 return {
42 Name = "Function count",
43 Func = lib.count,
44 --{ subj patt results }
45 { {"ab", lib.new"."}, { 2 } },
46 { {("abcd"):rep(3), "(.)b.(d)"}, { 3 } },
47 { {"abcd", ".*" }, { 1 } },
48 { {"abc", "^." }, { 1 } },
3649 }
3750 end
3851
215228 --{ s, p, f, n, res1, res2, res3 },
216229 { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test .
217230 { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+
218 { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .*
231 { {"a2c3", ".*", "#" }, {"#", 1, 1} }, -- test .*
219232 { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} },
220233 { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d
221234 { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D
307320 set_m_tfind (lib),
308321 set_m_find (lib),
309322 set_m_match (lib),
323 set_f_count (lib),
310324 set_f_gsub1 (lib),
311325 set_f_gsub2 (lib),
312326 set_f_gsub3 (lib),
11
22 local luatest = require "luatest"
33 local N = luatest.NT
4 local unpack = unpack or table.unpack
45
56 local function norm(a) return a==nil and N or a end
67
11
22 local luatest = require "luatest"
33 local N = luatest.NT
4 local unpack = unpack or table.unpack
45
56 local function norm(a) return a==nil and N or a end
67
6666 -- returns:
6767 -- 1) true, if success; false, if failure
6868 -- 2) test results table or error_message
69 local function test_function (test, func)
69 local function test_function (test, func, newmembuffer)
7070 local res
7171 local t = packNT (pcall (func, unpackNT (test[1])))
7272 if t[1] then
7373 table.remove (t, 1)
7474 res = t
75 if alien then
76 local subject = test[1][1]
77 local buf = alien.buffer (#subject)
78 if #subject > 0 then
79 alien.memmove (buf:topointer (), subject, #subject)
80 end
81 test[1][1] = buf
75 if newmembuffer then
76 test[1][1] = newmembuffer (test[1][1])
8277 local t = packNT (pcall (func, unpackNT (test[1])))
8378 if t[1] then
8479 table.remove (t, 1)
8580 res = t
8681 else
87 print "alien test failed"
82 print "buffer subjects test failed"
8883 res = t[2] --> error_message
8984 end
9085 end
119114 end
120115
121116 -- returns: a list of failed tests
122 local function test_set (set, lib)
117 local function test_set (set, lib, newmembuffer)
123118 local list = {}
124119
125120 if type (set.Func) == "function" then
126121 local func = set.Func
127122 for i,test in ipairs (set) do
128 local ok, res = test_function (test, func)
123 local ok, res = test_function (test, func, newmembuffer)
129124 if not ok then
130125 table.insert (list, {i=i, res})
131126 end
11
22 local luatest = require "luatest"
33 local N = luatest.NT
4 local unpack = unpack or table.unpack
45
56 local function norm(a) return a==nil and N or a end
67
5657
5758 local function set_f_gmatch (lib, flg)
5859 -- gmatch (s, p, [cf], [ef])
59 local pCSV = "(^[^,]*)|,([^,]*)"
60 local pCSV = "[^,]*"
6061 local F = false
6162 local function test_gmatch (subj, patt)
6263 local out, guard = {}, 10
7071 return {
7172 Name = "Function gmatch",
7273 Func = test_gmatch,
73 --{ subj patt results }
74 { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
75 { {"", pCSV}, {{"",F}} },
76 { {"12", pCSV}, {{"12",F}} },
77 ----{ {",", pCSV}, {{"", F},{F,""}} },
78 { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
79 ----{ {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
74 --{ subj patt results }
75 { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
76 { {"", pCSV}, {{"",N}} },
77 { {"12", pCSV}, {{"12",N}} },
78 { {",", pCSV}, {{"", N},{"", N}} },
79 { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} },
80 { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} },
81 { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} },
82 { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} },
8083 }
8184 end
8285
9699 Func = test_split,
97100 --{ subj patt results }
98101 { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
99 { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } },
100 { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
101 { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } },
102 { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
102 { {"ab", "$"}, {{"ab","",N}, {"",N,N} } },
103 { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
104 { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } },
105 { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
106 { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } },
107 { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } },
108 { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }},
109 }
110 end
111
112 local function set_f_internal_test (lib, flg)
113 return {
114 Name = "Function internal_test",
115 Func = lib.internal_test,
116 --{ params results }
117 { {""}, {true} },
103118 }
104119 end
105120
127142 }
128143 end
129144
145 local function set_m_capturecount (lib, flg)
146 return {
147 Name = "Method capturecount",
148 Method = "capturecount",
149 --{patt,cf,lo}, {subj,st,ef} { results }
150 { {"a"}, {}, { 0 } },
151 { {"(a)"}, {}, { 1 } },
152 { {"(a)(a)"}, {}, { 2 } },
153 { {"((a)a)"}, {}, { 2 } },
154 { {"((?i)a)(?:a)"}, {}, { 1 } },
155 }
156 end
157
130158 return function (libname)
131159 local lib = require (libname)
132160 local flags = lib.flags ()
133161 local sets = {
162 set_f_internal_test (lib, flags),
134163 set_f_match (lib, flags),
135164 set_f_find (lib, flags),
136165 set_f_gmatch (lib, flags),
137166 set_f_split (lib, flags),
138167 set_m_exec (lib, flags),
139168 set_m_tfind (lib, flags),
169 set_m_capturecount (lib, flags),
140170 }
141171 local MAJOR = tonumber(lib.version():match("%d+"))
142172 if MAJOR >= 0 then
11
22 local luatest = require "luatest"
33 local N = luatest.NT
4 local unpack = unpack or table.unpack
45
56 local function norm(a) return a==nil and N or a end
67
2627
2728 local function set_f_find (lib, flg)
2829 local cp1251 =
29 "������¨�������������������������‗אבגדהו¸זחטיךכלםמןנסעףפץצקרש��ת‎‏�"
30 "������¨�������������������������‗אבגדהו¸זחטיךכלםמןנסעףפץצקרשת��‎‏�"
3031 local loc = "Russian_Russia.1251"
3132 return {
3233 Name = "Function find",
5960
6061 local function set_f_gmatch (lib, flg)
6162 -- gmatch (s, p, [cf], [ef])
62 local pCSV = "(^[^,]*)|,([^,]*)"
63 local pCSV = "[^,]*"
6364 local F = false
6465 local function test_gmatch (subj, patt)
6566 local out, guard = {}, 10
7374 return {
7475 Name = "Function gmatch",
7576 Func = test_gmatch,
76 --{ subj patt results }
77 { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
78 { {"", pCSV}, {{"",F}} },
79 { {"12", pCSV}, {{"12",F}} },
80 { {",", pCSV}, {{"", F},{F,""}} },
81 { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
82 { {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
77 --{ subj patt results }
78 { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
79 { {"", pCSV}, {{"",N}} },
80 { {"12", pCSV}, {{"12",N}} },
81 { {",", pCSV}, {{"", N},{"", N}} },
82 { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} },
83 { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} },
84 { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} },
85 { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} },
8386 }
8487 end
8588
99102 Func = test_split,
100103 --{ subj patt results }
101104 { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
102 { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } },
103 { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
104 { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } },
105 { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
105 { {"ab", "$"}, {{"ab","",N}, {"",N,N} } },
106 { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
107 { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } },
108 { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
109 { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } },
110 { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } },
111 { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }},
106112 }
107113 end
108114
135141 end
136142
137143 local function set_m_dfa_exec (lib, flg)
144 local ver = tonumber(lib.version():match("%d+%.%d+"))
145 local NAP = ver < 8.34 and "" or "(*NO_AUTO_POSSESS)"
146 local flag_partial = ver < 10.0 and flg.PARTIAL or flg.PARTIAL_SOFT
138147 return {
139148 Name = "Method dfa_exec",
140149 Method = "dfa_exec",
141150 --{patt,cf,lo}, {subj,st,ef,os,ws} { results }
142 { {".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none]
143 { {".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st
144 { {".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st
151 { {NAP..".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none]
152 { {NAP..".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st
153 { {NAP..".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st
145154 { {".+"}, {"abcd",5}, {N } }, -- failing st
146 { {".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none]
155 { {NAP..".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none]
147156 { {".*?"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- non-greedy
148157 { {"aBC",flg.CASELESS}, {"abc"}, {1,{3},1} }, -- cf
149158 { {"aBC","i" }, {"abc"}, {1,{3},1} }, -- cf
152161 { {"bc"}, {"abc",N, flg.ANCHORED}, {N } }, -- ef
153162 { { "(.)b.(d)"}, {"abcd"}, {1,{4},1} }, --[captures]
154163 { {"abc"}, {"ab"}, {N } },
155 { {"abc"}, {"ab",N,flg.PARTIAL}, {1,{2},flg.ERROR_PARTIAL} },
156 { {".+"}, {string.rep("a",50),N,N,50,50}, {1, fill(50,26), 0}},-- small ovecsize
164 { {"abc"}, {"ab",N,flag_partial}, {1,{2},flg.ERROR_PARTIAL} },
165 { {NAP..".+"}, {string.rep("a",50),N,N,50,50}, {1, fill(50,26), 0}},-- small ovecsize
157166 }
167 end
168
169 local function set_m_fullinfo (lib, flg)
170 local r = lib.new("(foo)(bar)")
171 local info = r:fullinfo()
172 assert(info.CAPTURECOUNT == 2)
158173 end
159174
160175 return function (libname)
167182 set_f_split (lib, flags),
168183 set_m_exec (lib, flags),
169184 set_m_tfind (lib, flags),
185 set_m_fullinfo (lib, flags),
170186 }
171187 if flags.MAJOR >= 4 then
172188 table.insert (sets, set_named_subpatterns (lib, flags))
00 -- See Copyright Notice in the file LICENSE
11
22 local pat2pcre = require "pat2pcre"
3 local unpack = unpack or table.unpack
34
45 local function get_gsub (lib)
56 return lib.gsub or
2627 Func = get_gsub (lib),
2728 --{ s, p, f, n, res1, res2, res3 },
2829 { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} },
29 { {"a2c3", ".-", "#" }, {"#########", 9, 9} }, -- test .-
30 { {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5, 5} }, -- test .-
3031 { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} },
3132 { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} },
3233 { {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d
3637 { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} },
3738 { {"", pCSV,fCSV}, {"[]", 1, 1} },
3839 { {"123", pCSV,fCSV}, {"[123]", 1, 1} },
39 { {",", pCSV,fCSV}, {"[][]", 2, 2} },
40 { {",", pCSV,fCSV}, {"[],", 1, 1} },
4041 { {"123,,456", pCSV,fCSV}, {"[123][][456]", 3, 3}},
41 { {",,123,456,,abc,789,", pCSV,fCSV}, {"[][][123][456][][abc][789][]", 8, 8}},
42 { {",,123,456,,abc,789,", pCSV,fCSV}, {"[],[123][456][][abc][789][]", 7, 7}},
4243 }
4344 -- convert patterns: lua -> pcre
4445 for _, test in ipairs (set) do
00 -- See Copyright Notice in the file LICENSE
1
2 -- See if we have alien, so we can do tests with buffer subjects
3 local ok
4 ok, alien = pcall (require, "alien")
5 if not ok then
6 io.stderr:write ("Warning: alien not found, so cannot run tests with buffer subjects\n")
7 alien = nil
8 end
91
102 do
113 local path = "./?.lua;"
157 end
168 local luatest = require "luatest"
179
10 local function newalienbuffer (str)
11 local alien = require "alien"
12 local buf = alien.buffer (#str)
13 if #str > 0 then
14 alien.memmove (buf:topointer (), str, #str)
15 end
16 return buf
17 end
18
1819 -- returns: number of failures
19 local function test_library (libname, setfile, verbose)
20 local function test_library (libname, setfile, verbose, use_alien)
2021 if verbose then
2122 print (("[lib: %s; file: %s]"):format (libname, setfile))
2223 end
2425 local f = require (setfile)
2526 local sets = f (libname)
2627
27 local realalien = alien
28 if libname == "rex_posix" and not lib.flags ().STARTEND and alien then
29 alien = nil
30 io.stderr:write ("Cannot run posix tests with alien without REG_STARTEND\n")
28 local newmembuffer = use_alien and newalienbuffer or lib._newmembuffer
29 if newmembuffer then
30 if libname == "rex_posix" and not lib.flags ().STARTEND then
31 newmembuffer = nil
32 io.stderr:write ("Cannot run posix tests with buffer subjects without REG_STARTEND\n")
33 end
34 else
35 io.stderr:write ("Warning: cannot run tests with buffer subjects\n")
3136 end
3237
3338 local n = 0 -- number of failures
3540 if verbose then
3641 print (set.Name or "Unnamed set")
3742 end
38 local err = luatest.test_set (set, lib)
43 local err = luatest.test_set (set, lib, newmembuffer)
3944 if verbose then
4045 for _,v in ipairs (err) do
4146 print (" Test " .. v.i)
4752 if verbose then
4853 print ""
4954 end
50 alien = realalien
5155 return n
5256 end
5357
5660 gnu = { lib = "rex_gnu", "common_sets", "emacs_sets", "gnu_sets" },
5761 oniguruma = { lib = "rex_onig", "common_sets", "oniguruma_sets", },
5862 pcre = { lib = "rex_pcre", "common_sets", "pcre_sets", "pcre_sets2", },
63 pcre2 = { lib = "rex_pcre2", "common_sets", "pcre_sets", "pcre_sets2", },
5964 spencer = { lib = "rex_spencer", "common_sets", "posix_sets", "spencer_sets" },
6065 tre = { lib = "rex_tre", "common_sets", "posix_sets", "spencer_sets", --[["tre_sets"]] },
6166 }
6368 do
6469 local verbose, tests, nerr = false, {}, 0
6570 local dir
71 local use_alien
6672 -- check arguments
6773 for i = 1, select ("#", ...) do
6874 local arg = select (i, ...)
6975 if arg:sub(1,1) == "-" then
7076 if arg == "-v" then
7177 verbose = true
78 elseif arg == "-a" then
79 use_alien = true
7280 elseif arg:sub(1,2) == "-d" then
7381 dir = arg:sub(3)
82 else
83 error ("invalid argument: [" .. arg .. "]")
7484 end
7585 else
7686 if avail_tests[arg] then
98108 for _, test in ipairs (tests) do
99109 package.loaded[test.lib] = nil -- to force-reload the tested library
100110 for _, setfile in ipairs (test) do
101 nerr = nerr + test_library (test.lib, setfile, verbose)
111 nerr = nerr + test_library (test.lib, setfile, verbose, use_alien)
102112 end
103113 end
104114 print ("Total number of failures: " .. nerr)
11
22 local luatest = require "luatest"
33 local N = luatest.NT
4 local unpack = unpack or table.unpack
45
56 local function norm(a) return a==nil and N or a end
67
44
55 local luatest = require "luatest"
66 local N = luatest.NT
7 local unpack = unpack or table.unpack
78
89 local L = function(s) return (string.gsub(s, ".", "%0\0")) end
910
0 # Makefile for lrexlib
1
2 ifeq ($(DIRBIT),64)
3 MKFILES = \
4 rex_onig.mak \
5 rex_pcre.mak \
6 rex_pcre2.mak \
7 rex_tre.mak
8 else
9 MKFILES = \
10 rex_gnu.mak \
11 rex_onig.mak \
12 rex_pcre.mak \
13 rex_pcre2.mak \
14 rex_spencer.mak \
15 rex_tre.mak
16 endif
17
18 LOOP = @for %%d in ($(MKFILES)) do $(MAKE) -f %%d
19
20 all: build test
21
22 build:
23 $(LOOP)
24
25 test:
26 $(LOOP) test
27
28 install:
29 $(LOOP) install
30
31 clean:
32 del *.o *.def *.dll
33
34 .PHONY: all build test install clean
0 # Use with GNU Make.
1
2 # Lrexlib version
3 VERSION = 2.9.1
4
5 # User Settings ------------------------------------------------------------
6
7 # Target Lua version (51 for Lua 5.1, etc.)
8 LUAVERSION = 51
9 LUADOTVERSION = $(subst 5,5.,$(LUAVERSION))
10
11 # Target bitness: 32 or 64
12 DIRBIT = 32
13 # GCC location (GCC32 and GCC64 are defined environment variables)
14 PATH = $(GCC$(DIRBIT))
15
16 # INSTALLPATH : Path to install the built DLL.
17 # LUADLL : Lua DLL to link to (.dll should be omitted).
18 # LUAEXE : Lua interpreter.
19 # LUAINC : Path of Lua include files.
20 # LIBPATH : Path of lua51.dll, lua52.dll, pcre.dll, etc.
21
22 INSTALLPATH = S:\Progr\Exe\lib$(DIRBIT)\lua\$(LUADOTVERSION)
23 LUADLL = lua$(LUAVERSION)
24 LUAINC = $(PATH_SYSTEM)\include\lua\$(LUADOTVERSION)
25 LIBPATH = $(CROOT)\Programs\EXE$(DIRBIT)
26
27 ifeq ($(LUAVERSION),51)
28 LUAEXE = $(LIBPATH)\lua.exe
29 CREATEGLOBAL = -DREX_CREATEGLOBALVAR
30 else
31 LUAEXE = $(LIBPATH)\lua$(LUAVERSION).exe
32 endif
33 # --------------------------------------------------------------------------
34
35 BIN = $(PROJECT).dll
36 BININSTALL = $(INSTALLPATH)\$(BIN)
37 CC = gcc
38 AR = ar rcu
39 RANLIB = ranlib
40 CFLAGS = -W -Wall -O2 $(INCS) -DREX_OPENLIB=luaopen_$(PROJECT) \
41 -DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" \
42 -m$(DIRBIT) $(CREATEGLOBAL) $(MYCFLAGS)
43 DEFFILE = $(PROJECT).def
44 EXPORTED = luaopen_$(PROJECT)
45 INCS = -I$(LUAINC) $(MYINCS)
46 LIBS = -l$(LUADLL) -m$(DIRBIT) -s $(MYLIBS)
47 SRCPATH = ..\..\src
48 TESTPATH = ..\..\test
49
50 .PHONY: all install test vtest clean
51
52 vpath %.c $(SRCPATH);$(SRCPATH)\$(PROJDIR)
53 vpath %.h $(SRCPATH);$(SRCPATH)\$(PROJDIR)
54
55 all: $(BIN)
56
57 clean:
58 del $(OBJ) $(BIN) $(DEFFILE)
59
60 install: $(BININSTALL)
61
62 test:
63 cd $(TESTPATH) && $(LUAEXE) runtest.lua $(TESTNAME) -d$(CURDIR)
64
65 vtest:
66 cd $(TESTPATH) && $(LUAEXE) runtest.lua -v $(TESTNAME) -d$(CURDIR)
67
68 $(BIN): $(OBJ) $(DEFFILE)
69 $(CC) $(DEFFILE) $(OBJ) -L$(LIBPATH) $(LIBS) -o $@ -shared
70
71 lib$(PROJECT)$(LUAVERSION).a: $(OBJ)
72 $(AR) $@ $?
73 $(RANLIB) $@
74
75 $(DEFFILE):
76 echo EXPORTS > $@
77 for %%d in ($(EXPORTED)) do echo %%d>> $@
78
79 $(BININSTALL): $(BIN)
80 copy /Y $< $@
0 # Documentation Makefile
1
2 APP = rst2html.py
3 CP = "copy /y"
4 RM = del
5 IDX = ..\README.rst
6
7 ALLVAR = APP=$(APP) CP=$(CP) RM=$(RM) IDX=$(IDX)
8
9 .PHONY: all clean
10
11 all clean:
12 cd ..\..\doc && $(MAKE) $(ALLVAR) $@
0 # Project: rex_gnu
1
2 # User Settings ------------------------------------------------------------
3 # path of GNU include files
4 REGEXINC = $(PATH_WORK)\system\include\gnuregex
5 # --------------------------------------------------------------------------
6
7 PROJECT = rex_gnu
8 MYINCS = -I$(REGEXINC)
9 MYLIBS = -lregex2
10 OBJ = lgnu.o common.o
11 PROJDIR = gnu
12 TESTNAME = gnu
13
14 include _mingw.mak
15
16 lgnu.o : common.h algo.h
17 common.o : common.h
0 # Project: rex_onig
1
2 # User Settings ------------------------------------------------------------
3 # path of Oniguruma include files
4 REGEXINC = $(PATH_WORK)\system\include\oniguruma
5 # --------------------------------------------------------------------------
6
7 PROJECT = rex_onig
8 MYINCS = -I$(REGEXINC)
9 MYLIBS = -lonig -Wl,--enable-auto-import
10 OBJ = lonig.o lonig_f.o common.o
11 PROJDIR = oniguruma
12 TESTNAME = oniguruma
13
14 include _mingw.mak
15
16 lonig.o : common.h algo.h
17 lonig_f.o : common.h
18 common.o : common.h
0 # Project: rex_pcre
1
2 # User Settings ------------------------------------------------------------
3 # path of PCRE include files
4 REGEXINC = $(PATH_WORK)\system\include\pcre
5 # --------------------------------------------------------------------------
6
7 PROJECT = rex_pcre
8 MYINCS = -I$(REGEXINC)
9 MYLIBS = -lpcre
10 OBJ = lpcre.o lpcre_f.o common.o
11 PROJDIR = pcre
12 TESTNAME = pcre
13
14 include _mingw.mak
15
16 lpcre.o : common.h algo.h
17 lpcre_f.o : common.h
18 common.o : common.h
0 # Project: rex_pcre2
1
2 # User Settings ------------------------------------------------------------
3 # path of PCRE2 include files
4 REGEXINC = $(PATH_WORK)\system\include\pcre2
5 # --------------------------------------------------------------------------
6
7 PROJECT = rex_pcre2
8 MYINCS = -I$(REGEXINC)
9 MYCFLAGS = -DPCRE2_CODE_UNIT_WIDTH=8
10 MYLIBS = -lpcre2
11 OBJ = lpcre2.o lpcre2_f.o common.o
12 PROJDIR = pcre2
13 TESTNAME = pcre2
14
15 include _mingw.mak
16
17 lpcre2.o : common.h algo.h
18 lpcre2_f.o : common.h
19 common.o : common.h
0 # Project: rex_spencer
1
2 # User Settings ------------------------------------------------------------
3 # path of Spencer's include files
4 REGEXINC = $(PATH_WORK)\system\include\rxspencer
5 # --------------------------------------------------------------------------
6
7 PROJECT = rex_spencer
8 MYINCS = -I$(REGEXINC)
9 MYLIBS = -lrxspencer
10 OBJ = lposix.o common.o
11 PROJDIR = posix
12 TESTNAME = spencer
13
14 include _mingw.mak
15
16 lposix.o : common.h algo.h
17 common.o : common.h
0 # Project: rex_tre
1
2 # User Settings ------------------------------------------------------------
3 # path of TRE include files
4 REGEXINC = $(PATH_WORK)\system\include
5 # --------------------------------------------------------------------------
6
7 PROJECT = rex_tre
8 MYINCS = -I$(REGEXINC)
9 MYLIBS = -ltre
10 OBJ = ltre.o common.o
11 PROJDIR = tre
12 TESTNAME = tre
13
14 # Uncomment the following line to add wide-character functions (in alpha state).
15 # ADDWIDECHARFUNCS = 1
16 ifdef ADDWIDECHARFUNCS
17 OBJ += ltre_w.o
18 MYCFLAGS += -DREX_ADDWIDECHARFUNCS
19 endif
20
21 include _mingw.mak
22
23 ltre.o : common.h algo.h
24 ltre_w.o : common.h algo.h
25 common.o : common.h