Update upstream source from tag 'upstream/2.9.1'
Update to upstream version '2.9.1'
with Debian dir e460ddf9b9596389929f06f1b6ffe02f076ae888
Sergei Golovan
2 years ago
0 | 0 | License of Lrexlib release |
1 | 1 | -------------------------- |
2 | 2 | |
3 | Copyright (C) Reuben Thomas 2000-2012 | |
4 | Copyright (C) Shmuel Zeigerman 2004-2012 | |
3 | Copyright (C) Reuben Thomas 2000-2020 | |
4 | Copyright (C) Shmuel Zeigerman 2004-2020 | |
5 | 5 | |
6 | 6 | Permission is hereby granted, free of charge, to any person |
7 | 7 | obtaining a copy of this software and associated |
0 | 0 | # Makefile for lrexlib |
1 | 1 | |
2 | VERSION = 2.7.2 | |
2 | VERSION = 2.9.1 | |
3 | 3 | PROJECT = lrexlib |
4 | 4 | PROJECT_VERSIONED = $(PROJECT)-$(VERSION) |
5 | 5 | |
7 | 7 | LUA = lua |
8 | 8 | LUAROCKS = luarocks |
9 | 9 | CP = cp -a |
10 | RM = rm | |
10 | RM = rm -f | |
11 | 11 | RST2HTML = rst2html |
12 | REGNAMES = gnu pcre posix oniguruma tre | |
12 | REGNAMES = gnu pcre pcre2 posix oniguruma tre | |
13 | 13 | LUAROCKS_COMMAND = make |
14 | 14 | |
15 | 15 | |
30 | 30 | |
31 | 31 | rockspecs: |
32 | 32 | rm -f *.rockspec |
33 | $(LUA) mkrockspecs.lua $(VERSION) | |
33 | $(LUA) mkrockspecs.lua $(PROJECT) $(VERSION) | |
34 | 34 | |
35 | 35 | doc/index.txt: README.rst |
36 | 36 | $(CP) $< $@ |
37 | 37 | |
38 | 38 | check: build |
39 | 39 | for i in $(REGNAMES); do \ |
40 | LUA_PATH="test/?.lua;$(LUA_PATH)" $(LUA) test/runtest.lua -dsrc/$$i $$i; \ | |
40 | LUA_PATH="test/?.lua;$(LUA_PATH);" $(LUA) test/runtest.lua -dsrc/$$i $$i; \ | |
41 | 41 | done |
42 | 42 | |
43 | 43 | clean: |
49 | 49 | git tag -a -m "Release tag" rel-`echo $(VERSION) | sed -e 's/\./-/g'` && \ |
50 | 50 | git push && git push --tags && \ |
51 | 51 | $(MAKE) build LUAROCKS_COMMAND=build && \ |
52 | woger lua package=$(PROJECT) package_name=$(PROJECT) version=$(VERSION) description="Lua binding for regex libraries" notes=release-notes home="https://github.com/rrthomas/$(PROJECT)" | |
52 | woger lua package=$(PROJECT) package_name=$(PROJECT) version=$(VERSION) description="Lua binding for regex libraries" notes=release-notes home="`$(LUA) -e'version="'$(VERSION)'"; flavour="none"; t = require "rockspecs"; print(t.default.description.homepage)'`" | |
53 | 53 | rm -f release-notes |
0 | 2020-08-07 Release 2.9.1 | |
1 | ||
2 | * Add Lua 5.4 support | |
3 | ||
4 | 2017-11-07 Release 2.9.0 | |
5 | ||
6 | * Add PCRE2 support. | |
7 | ||
8 | 2015-02-26 Release 2.8.0 | |
9 | ||
10 | * Add Lua 5.3 support | |
11 | * No longer return empty matches adjacent to previous non-empty match. | |
12 | ||
0 | 13 | 2013-01-08 Release 2.7.2 |
1 | 14 | |
2 | 15 | * Fixed the use of alternative allocators, and a memory leak. |
4 | 4 | | and Shmuel Zeigerman (shmuz@013net.net) |
5 | 5 | |
6 | 6 | **Lrexlib** provides bindings of five regular expression library APIs |
7 | (POSIX_, PCRE_, GNU_, TRE_ and Oniguruma_) to Lua_ 5.1 and Lua 5.2. | |
7 | (POSIX_, PCRE_, PCRE2_, GNU_, TRE_ and Oniguruma_) to Lua_ >= 5.1. | |
8 | 8 | The bindings for TRE and Oniguruma are not currently complete. |
9 | 9 | |
10 | **Lrexlib** is copyright Reuben Thomas 2000-2012 and copyright Shmuel | |
11 | Zeigerman 2004-2012, and is released under the same license as Lua, | |
10 | **Lrexlib** is copyright Reuben Thomas 2000-2020 and copyright Shmuel | |
11 | Zeigerman 2004-2020, and is released under the same license as Lua, | |
12 | 12 | the MIT_ license (otherwise known as the revised BSD license). There |
13 | 13 | is no warranty. |
14 | 14 | |
15 | 15 | .. _POSIX: http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html |
16 | 16 | .. _PCRE: http://www.pcre.org/pcre.txt |
17 | .. _PCRE2: http://www.pcre.org/pcre2.txt | |
17 | 18 | .. _GNU: ftp://ftp.gnu.org/old-gnu/regex/ |
18 | .. _Oniguruma: http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt | |
19 | .. _Oniguruma: https://github.com/kkos/oniguruma | |
19 | 20 | .. _TRE: http://laurikari.net/tre/documentation/ |
20 | 21 | .. _Lua: http://www.lua.org |
21 | 22 | .. _MIT: http://www.opensource.org/licenses/mit-license.php |
35 | 36 | |
36 | 37 | luarocks install lrexlib-FLAVOUR |
37 | 38 | |
38 | where **FLAVOUR** is one of PCRE, POSIX, oniguruma, TRE, GNU | |
39 | where **FLAVOUR** is one of PCRE, PCRE2, POSIX, oniguruma, TRE, GNU | |
39 | 40 | |
40 | 41 | .. _LuaRocks: http://www.luarocks.org |
41 | 42 |
5 | 5 | <body> |
6 | 6 | |
7 | 7 | <h2>Lrexlib</h2> |
8 | <p>Copyright © Reuben Thomas 2000-2012<br> | |
9 | Copyright © Shmuel Zeigerman 2004-2012 | |
8 | <p>Copyright © Reuben Thomas 2000-2020<br> | |
9 | Copyright © Shmuel Zeigerman 2004-2020 | |
10 | 10 | |
11 | 11 | <p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
12 | 12 |
7 | 7 | ------------------------------------------------------------ |
8 | 8 | |
9 | 9 | **Lrexlib** builds into shared libraries called by default *rex_posix.so*, |
10 | *rex_pcre.so*, *rex_gnu.so*, *rex_tre.so* and *rex_onig.so*, which can be used with | |
11 | *require*. | |
10 | *rex_pcre.so*, *rex_pcre2.so*, *rex_gnu.so*, *rex_tre.so* and *rex_onig.so*, | |
11 | which can be used with *require*. | |
12 | 12 | |
13 | 13 | ------------------------------------------------------------ |
14 | 14 | |
27 | 27 | MyFunc (arg1, arg2, [arg3], [arg4]) |
28 | 28 | |
29 | 29 | 3. Throughout this document (unless it causes ambiguity), the identifier **rex** |
30 | is used in place of either *rex_posix*, *rex_pcre*, *rex_gnu*, *rex_onig* or | |
31 | *rex_tre*, which are the default namespaces for the corresponding libraries. | |
30 | is used in place of either *rex_posix*, *rex_pcre*, *rex_pcre2*, *rex_gnu*, | |
31 | *rex_onig* or *rex_tre*, which are the default namespaces for the corresponding | |
32 | libraries. | |
32 | 33 | |
33 | 34 | 4. All functions that take a regular expression pattern as an argument will |
34 | 35 | generate an error if that pattern is found invalid by the regex library. |
37 | 38 | too. In this case, the cf_ and larg_ arguments are ignored (should |
38 | 39 | be either supplied as nils or omitted). |
39 | 40 | |
40 | 6. All functions that take a string-type subject accept a table (in Lua >= 5.2) | |
41 | or userdata that has a ``topointer`` method and ``__len`` metamethod, and | |
42 | take the subject to be a block of memory starting at the address returned by | |
41 | 6. All functions that take a string-type subject accept a table or userdata that | |
42 | has a ``topointer`` method and ``__len`` metamethod, and take the subject to | |
43 | be a block of memory starting at the address returned by | |
43 | 44 | ``subject:topointer()`` and of length ``#subject``. This works with buffers |
44 | 45 | objects from the alien library (https://github.com/mascarenhas/alien). Note |
45 | 46 | that special attention is needed with POSIX regex libraries that do not |
46 | 47 | support ``REG_STARTEND``, and hence need NUL-terminated subjects: the NUL is |
47 | not included in the string length, so alien buffers must be wrapped to | |
48 | report a length that excludes the NUL. | |
48 | not included in the string length, so alien buffers must be wrapped to report | |
49 | a length that excludes the NUL. | |
49 | 50 | |
50 | 51 | .. _cf: |
51 | 52 | |
52 | 53 | 7. The default value for *compilation flags* (*cf*) that Lrexlib uses when |
53 | 54 | the parameter is not supplied or ``nil`` is: |
54 | 55 | |
55 | * REG_EXTENDED for POSIX and TRE | |
56 | * 0 for PCRE | |
57 | * ONIG_OPTION_NONE for Oniguruma | |
58 | * SYNTAX_POSIX_EXTENDED for GNU | |
59 | ||
60 | **PCRE**, **Oniguruma**: *cf* may also be supplied as a string, whose | |
61 | characters stand for compilation flags. Combinations of the following | |
56 | * ``REG_EXTENDED`` for POSIX and TRE | |
57 | * ``0`` for PCRE and PCRE2 | |
58 | * ``ONIG_OPTION_NONE`` for Oniguruma | |
59 | * ``SYNTAX_POSIX_EXTENDED`` for GNU | |
60 | ||
61 | **PCRE**, **PCRE2**, **Oniguruma**: *cf* may also be supplied as a string, | |
62 | whose characters stand for compilation flags. Combinations of the following | |
62 | 63 | characters (case sensitive) are supported: |
63 | 64 | |
64 | =============== ================== ============================== | |
65 | **Character** **PCRE flag** **Oniguruma flag** | |
66 | =============== ================== ============================== | |
67 | **i** PCRE_CASELESS ONIG_OPTION_IGNORECASE | |
68 | **m** PCRE_MULTILINE ONIG_OPTION_NEGATE_SINGLELINE | |
69 | **s** PCRE_DOTALL ONIG_OPTION_MULTILINE | |
70 | **x** PCRE_EXTENDED ONIG_OPTION_EXTEND | |
71 | **U** PCRE_UNGREEDY n/a | |
72 | **X** PCRE_EXTRA n/a | |
73 | =============== ================== ============================== | |
65 | =============== ================== ================== ============================== | |
66 | **Character** **PCRE flag** **PCRE2 flag** **Oniguruma flag** | |
67 | =============== ================== ================== ============================== | |
68 | **i** PCRE_CASELESS PCRE2_CASELESS ONIG_OPTION_IGNORECASE | |
69 | **m** PCRE_MULTILINE PCRE2_MULTILINE ONIG_OPTION_NEGATE_SINGLELINE | |
70 | **s** PCRE_DOTALL PCRE2_DOTALL ONIG_OPTION_MULTILINE | |
71 | **x** PCRE_EXTENDED PCRE2_EXTENDED ONIG_OPTION_EXTEND | |
72 | **U** PCRE_UNGREEDY PCRE2_UNGREEDY n/a | |
73 | **X** PCRE_EXTRA n/a n/a | |
74 | =============== ================== ================== ============================== | |
74 | 75 | |
75 | 76 | .. _ef: |
76 | 77 | |
77 | 78 | 8. The default value for *execution flags* (*ef*) that Lrexlib uses when |
78 | 79 | the parameter is not supplied or ``nil``, is: |
79 | 80 | |
80 | * 0 for standard POSIX regex library | |
81 | * REG_STARTEND for those POSIX regex libraries that support it, | |
82 | e.g. Spencer's. | |
83 | * 0 for PCRE, Oniguruma and TRE | |
81 | * ``0`` for standard POSIX regex library | |
82 | * ``REG_STARTEND`` for those POSIX regex libraries that support it, e.g. Spencer's | |
83 | * ``0`` for PCRE, PCRE2, Oniguruma and TRE | |
84 | 84 | |
85 | 85 | .. _larg: |
86 | 86 | |
87 | 87 | 9. The notation *larg...* is used to indicate optional library-specific |
88 | 88 | arguments, which are documented in the ``new`` method of each library. |
89 | ||
90 | 10. In the functions searching for multiple matches (``gmatch``, ``gsub``, | |
91 | ``split``, ``count``) every empty match adjacent to the previous match | |
92 | is discarded, e.g. ``rex.count("abc",".*")`` will return 1. | |
89 | 93 | |
90 | 94 | ------------------------------------------------------------ |
91 | 95 | |
222 | 226 | *subj* and replaces them according to the parameters *repl* and *n* (see details |
223 | 227 | below). |
224 | 228 | |
225 | +---------+-----------------------------------+-------------------------+-------------+ | |
226 | |Parameter| Description | Type |Default Value| | |
227 | +=========+===================================+=========================+=============+ | |
228 | | subj |subject | string | n/a | | |
229 | +---------+-----------------------------------+-------------------------+-------------+ | |
230 | | patt |regular expression pattern |string or userdata | n/a | | |
231 | +---------+-----------------------------------+-------------------------+-------------+ | |
232 | | repl |substitution source |string, function, table, | n/a | | |
233 | | | |``false`` or ``nil`` | | | |
234 | +---------+-----------------------------------+-------------------------+-------------+ | |
235 | | [n] |maximum number of matches to search| number or function | ``nil`` | | |
236 | | |for, or control function, or nil | | | | |
237 | +---------+-----------------------------------+-------------------------+-------------+ | |
238 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
239 | +---------+-----------------------------------+-------------------------+-------------+ | |
240 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
241 | +---------+-----------------------------------+-------------------------+-------------+ | |
242 | |[larg...]|library-specific arguments | | | | |
243 | +---------+-----------------------------------+-------------------------+-------------+ | |
229 | +---------+-----------------------------------+--------------------------+-------------+ | |
230 | |Parameter| Description | Type |Default Value| | |
231 | +=========+===================================+==========================+=============+ | |
232 | | subj |subject | string | n/a | | |
233 | +---------+-----------------------------------+--------------------------+-------------+ | |
234 | | patt |regular expression pattern |string or userdata | n/a | | |
235 | +---------+-----------------------------------+--------------------------+-------------+ | |
236 | | repl |substitution source |string, function or table | n/a | | |
237 | +---------+-----------------------------------+--------------------------+-------------+ | |
238 | | [n] |maximum number of matches to search| number or function | ``nil`` | | |
239 | | |for, or control function, or nil | | | | |
240 | +---------+-----------------------------------+--------------------------+-------------+ | |
241 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
242 | +---------+-----------------------------------+--------------------------+-------------+ | |
243 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
244 | +---------+-----------------------------------+--------------------------+-------------+ | |
245 | |[larg...]|library-specific arguments | | | | |
246 | +---------+-----------------------------------+--------------------------+-------------+ | |
244 | 247 | |
245 | 248 | **Returns:** |
246 | 249 | 1. The subject string with the substitutions made. |
248 | 251 | 3. Number of substitutions made. |
249 | 252 | |
250 | 253 | **Details:** |
251 | The parameter *repl* can be either a string, a function, a table, | |
252 | ``false`` or ``nil``. On each match made, it is converted into a | |
253 | value *repl_out* that may be used for the replacement. | |
254 | The parameter *repl* can be either a string, a function or a table. | |
255 | On each match made, it is converted into a value *repl_out* that may be used | |
256 | for the replacement. | |
254 | 257 | |
255 | 258 | *repl_out* is generated differently depending on the type of *repl*: |
256 | 259 | |
289 | 292 | same rules as for the return value of *repl* call, described in the above |
290 | 293 | paragraph. |
291 | 294 | |
292 | 4. If *repl* is ``false`` or ``nil``, no replacement is done. Note | |
293 | that, unusually for Lua, if ``repl`` is absent, it is not taken | |
294 | to be ``nil``. This is to prevent programming errors caused by | |
295 | inadvertently missing out *repl*. | |
296 | ||
297 | 295 | Note: Under some circumstances, the value of *repl_out* may be ignored; see |
298 | 296 | below_. |
299 | 297 | |
377 | 375 | |
378 | 376 | ------------------------------------------------------------ |
379 | 377 | |
378 | count | |
379 | ----- | |
380 | ||
381 | :funcdef:`rex.count (subj, patt, [cf], [ef], [larg...])` | |
382 | ||
383 | This function counts matches of the pattern *patt* in the string *subj*. | |
384 | ||
385 | +---------+-----------------------------------+--------------------------+-------------+ | |
386 | |Parameter| Description | Type |Default Value| | |
387 | +=========+===================================+==========================+=============+ | |
388 | | subj |subject | string | n/a | | |
389 | +---------+-----------------------------------+--------------------------+-------------+ | |
390 | | patt |regular expression pattern |string or userdata | n/a | | |
391 | +---------+-----------------------------------+--------------------------+-------------+ | |
392 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
393 | +---------+-----------------------------------+--------------------------+-------------+ | |
394 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
395 | +---------+-----------------------------------+--------------------------+-------------+ | |
396 | |[larg...]|library-specific arguments | | | | |
397 | +---------+-----------------------------------+--------------------------+-------------+ | |
398 | ||
399 | **Returns:** | |
400 | 1. Number of matches found. | |
401 | ||
402 | ------------------------------------------------------------ | |
403 | ||
380 | 404 | flags |
381 | 405 | ----- |
382 | 406 | |
407 | 431 | constants in the used library. They are formed as follows: |
408 | 432 | |
409 | 433 | * **POSIX**, **TRE**: prefix REG\_ is omitted, e.g. REG_ICASE becomes ``"ICASE"``. |
410 | * **PCRE:** prefix PCRE\_ is omitted, e.g. PCRE_CASELESS becomes | |
411 | ``"CASELESS"``. | |
434 | * **PCRE:** prefix PCRE\_ is omitted, e.g. PCRE_CASELESS becomes ``"CASELESS"``. | |
435 | * **PCRE2:** prefix PCRE2\_ is omitted, e.g. PCRE2_CASELESS becomes ``"CASELESS"``. | |
412 | 436 | * **Oniguruma:** names of constants are converted to strings with no alteration, |
413 | 437 | but for ONIG_OPTION_xxx constants, alias strings are created additionally, |
414 | 438 | e.g., the value of ONIG_OPTION_IGNORECASE constant becomes accessible via |
479 | 503 | result, in a table. This table contains ``false`` in the positions where the |
480 | 504 | corresponding sub-pattern did not participate in the match. |
481 | 505 | |
482 | 1. **PCRE**, **Oniguruma**: if *named subpatterns* are used then the table | |
483 | also contains substring matches keyed by their correspondent subpattern | |
484 | names (strings). | |
506 | 1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then | |
507 | the table also contains substring matches keyed by their correspondent | |
508 | subpattern names (strings). | |
485 | 509 | |
486 | 510 | **Returns on failure:** |
487 | 511 | 1. ``nil`` |
517 | 541 | positions where the corresponding sub-pattern did not participate in the |
518 | 542 | match. |
519 | 543 | |
520 | 1. **PCRE**, **Oniguruma**: if *named subpatterns* are used then the table | |
521 | also contains substring matches keyed by their correspondent subpattern | |
522 | names (strings). | |
544 | 1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then | |
545 | the table also contains substring matches keyed by their correspondent | |
546 | subpattern names (strings). | |
523 | 547 | |
524 | 548 | **Returns on failure:** |
525 | 549 | 1. ``nil`` |
540 | 564 | :funcdef:`rex.new (patt, [cf], [lo])` |
541 | 565 | |
542 | 566 | The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a |
543 | userdata obtained from a call to maketables_. The default value, used when the | |
544 | parameter is not supplied or ``nil``, is the built-in PCRE set of character | |
567 | userdata obtained from a call to maketables__. The default value, used when | |
568 | the parameter is not supplied or ``nil``, is the built-in PCRE set of character | |
545 | 569 | tables. |
570 | ||
571 | __ maketables_pcre_ | |
572 | ||
573 | ------------------------------------------------------------ | |
574 | ||
575 | fullinfo | |
576 | -------- | |
577 | ||
578 | [See *pcre_fullinfo* in the PCRE docs.] | |
579 | ||
580 | :funcdef:`r:fullinfo ()` | |
581 | ||
582 | This function returns a table containing information about the compiled pattern. | |
583 | The keys are strings formed in the following way: | |
584 | ``PCRE_INFO_CAPTURECOUNT`` -> ``"CAPTURECOUNT"``. The values are numbers. | |
585 | ||
586 | ------------------------------------------------------------ | |
587 | ||
588 | .. _dfa_exec_pcre: | |
546 | 589 | |
547 | 590 | dfa_exec |
548 | 591 | -------- |
587 | 630 | |
588 | 631 | ------------------------------------------------------------ |
589 | 632 | |
633 | .. _maketables_pcre: | |
634 | ||
590 | 635 | maketables |
591 | 636 | ---------- |
592 | 637 | |
624 | 669 | :funcdef:`rex_pcre.version ()` |
625 | 670 | |
626 | 671 | This function returns a string containing the version of the used PCRE library |
672 | and its release date. | |
673 | ||
674 | ------------------------------------------------------------ | |
675 | ||
676 | PCRE2-only functions and methods | |
677 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
678 | ||
679 | new | |
680 | --- | |
681 | ||
682 | :funcdef:`rex.new (patt, [cf], [lo])` | |
683 | ||
684 | The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a | |
685 | userdata obtained from a call to maketables__. The default value, used when | |
686 | the parameter is not supplied or ``nil``, is the built-in PCRE2 set of character | |
687 | tables. | |
688 | ||
689 | __ maketables_pcre2_ | |
690 | ||
691 | ------------------------------------------------------------ | |
692 | ||
693 | patterninfo | |
694 | ----------- | |
695 | ||
696 | [See *pcre2_patterninfo* in the PCRE2 docs.] | |
697 | ||
698 | :funcdef:`r:patterninfo ()` | |
699 | ||
700 | This function returns a table containing information about the compiled pattern. | |
701 | The keys are strings formed in the following way: | |
702 | ``PCRE2_INFO_CAPTURECOUNT`` -> ``"CAPTURECOUNT"``. The values are numbers. | |
703 | ||
704 | ------------------------------------------------------------ | |
705 | ||
706 | dfa_exec | |
707 | -------- | |
708 | ||
709 | [See *pcre2_dfa_exec* in the PCRE2 docs.] | |
710 | ||
711 | :funcdef:`r:dfa_exec (subj, [init], [ef], [ovecsize], [wscount])` | |
712 | ||
713 | The method matches a compiled regular expression *r* against a given subject | |
714 | string *subj*, using a DFA matching algorithm. | |
715 | ||
716 | +----------+-------------------------------------+--------+-------------+ | |
717 | |Parameter | Description | Type |Default Value| | |
718 | +==========+=====================================+========+=============+ | |
719 | | r |regex object produced by new |userdata| n/a | | |
720 | +----------+-------------------------------------+--------+-------------+ | |
721 | | subj |subject | string | n/a | | |
722 | +----------+-------------------------------------+--------+-------------+ | |
723 | | [init] |start offset in the subject | number | 1 | | |
724 | | |(can be negative) | | | | |
725 | +----------+-------------------------------------+--------+-------------+ | |
726 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
727 | +----------+-------------------------------------+--------+-------------+ | |
728 | |[ovecsize]|size of the array for result offsets | number | 100 | | |
729 | +----------+-------------------------------------+--------+-------------+ | |
730 | |[wscount] |number of elements in the working | number | 50 | | |
731 | | |space array | | | | |
732 | +----------+-------------------------------------+--------+-------------+ | |
733 | ||
734 | **Returns on success (either full or partial match):** | |
735 | 1. The start point of the matches found (a number). | |
736 | 2. A table containing the end points of the matches found, the longer matches | |
737 | first. | |
738 | 3. The return value of the underlying *pcre_dfa_exec* call (a number). | |
739 | ||
740 | **Returns on failure (no match):** | |
741 | 1. ``nil`` | |
742 | ||
743 | **Example:** | |
744 | If there are 3 matches found starting at offset 10 and ending at offsets 15, 20 | |
745 | and 25 then the function returns the following: 10, { 25,20,15 }, 3. | |
746 | ||
747 | ------------------------------------------------------------ | |
748 | ||
749 | jit_compile | |
750 | ----------- | |
751 | ||
752 | [See *pcre2_jit_compile* in the PCRE2 docs.] | |
753 | ||
754 | :funcdef:`r:jit_compile ([options])` | |
755 | ||
756 | Parameter *options* is a number (a bitwise OR of separate options; | |
757 | it defaults to ``PCRE2_JIT_COMPLETE``). | |
758 | ||
759 | The method returns ``true`` on success or ``false`` + error message string on failure. | |
760 | ||
761 | ------------------------------------------------------------ | |
762 | ||
763 | .. _maketables_pcre2: | |
764 | ||
765 | maketables | |
766 | ---------- | |
767 | ||
768 | [See *pcre2_maketables* in the PCRE2 docs.] | |
769 | ||
770 | :funcdef:`rex_pcre2.maketables ()` | |
771 | ||
772 | Creates a set of character tables corresponding to the current locale and | |
773 | returns it as a userdata. The returned value can be passed to any Lrexlib | |
774 | function accepting the *locale* parameter. | |
775 | ||
776 | ------------------------------------------------------------ | |
777 | ||
778 | config | |
779 | ------ | |
780 | ||
781 | [See *pcre2_config* in the PCRE2 docs.] | |
782 | ||
783 | :funcdef:`rex_pcre2.config ([tb])` | |
784 | ||
785 | This function returns a table containing the values of the configuration | |
786 | parameters used at PCRE2 library build-time. Those parameters (numbers) are | |
787 | keyed by their names (strings). If the table argument *tb* is supplied then it | |
788 | is used as the output table, else a new table is created. | |
789 | ||
790 | ------------------------------------------------------------ | |
791 | ||
792 | version | |
793 | ------- | |
794 | ||
795 | [See *pcre2_config(PCRE2_CONFIG_VERSION)* in the PCRE2 docs.] | |
796 | ||
797 | :funcdef:`rex_pcre2.version ()` | |
798 | ||
799 | This function returns a string containing the version of the used PCRE2 library | |
627 | 800 | and its release date. |
628 | 801 | |
629 | 802 | ------------------------------------------------------------ |
701 | 874 | |
702 | 875 | This function returns a string containing the version of the used Oniguruma |
703 | 876 | library. |
877 | ||
878 | ------------------------------------------------------------ | |
879 | ||
880 | capturecount | |
881 | ------------ | |
882 | ||
883 | [See *onig_number_of_captures* in the Oniguruma docs.] | |
884 | ||
885 | :funcdef:`r:capturecount ()` | |
886 | ||
887 | Returns the number of captures in the pattern. | |
704 | 888 | |
705 | 889 | ------------------------------------------------------------ |
706 | 890 | |
847 | 1031 | Incompatibilities with previous versions |
848 | 1032 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
849 | 1033 | |
1034 | **Incompatibilities between versions 2.8 and 2.7:** | |
1035 | ||
1036 | 1. In the functions searching for multiple matches every empty match adjacent | |
1037 | to the previous match is discarded. | |
1038 | ||
850 | 1039 | **Incompatibilities between versions 2.6 and 2.5:** |
851 | 1040 | |
852 | 1041 | 1. Removed function ``plainfind``. |
863 | 1052 | |
864 | 1053 | **Incompatibilities between versions 2.1 and 2.0:** |
865 | 1054 | |
866 | 1. match_, find_, tfind_, exec_, dfa_exec_: only one value (a ``nil``) is | |
1055 | 1. match_, find_, tfind_, exec_, dfa_exec__: only one value (a ``nil``) is | |
867 | 1056 | returned when the subject does not match the pattern. Any other failure |
868 | 1057 | generates an error. |
1058 | ||
1059 | __ dfa_exec_pcre_ | |
869 | 1060 | |
870 | 1061 | **Incompatibilities between versions 2.0 and 1.19:** |
871 | 1062 |
0 | -- Generate the rockspecs | |
0 | -- Generate rockspecs from a prototype with variants | |
1 | 1 | |
2 | require "std" | |
2 | local tree = require "std.tree" | |
3 | 3 | |
4 | if select ("#", ...) < 1 then | |
5 | io.stderr:write "Usage: mkrockspecs VERSION\n" | |
4 | if select ("#", ...) < 2 then | |
5 | io.stderr:write "Usage: mkrockspecs PACKAGE VERSION\n" | |
6 | 6 | os.exit () |
7 | 7 | end |
8 | 8 | |
9 | version = select (1, ...) | |
9 | package_name = select (1, ...) | |
10 | version = select (2, ...) | |
10 | 11 | |
11 | 12 | function format (x, indent) |
12 | 13 | indent = indent or "" |
20 | 21 | for i, v in ipairs (x) do |
21 | 22 | s = s..indent..format (v, indent.." ")..",\n" |
22 | 23 | end |
23 | return s..indent:sub(1, -3).."}" | |
24 | return s..indent:sub (1, -3).."}" | |
24 | 25 | elseif type (x) == "string" then |
25 | 26 | return string.format ("%q", x) |
26 | 27 | else |
28 | 29 | end |
29 | 30 | end |
30 | 31 | |
32 | flavour = "" -- a global, visible in loadfile | |
31 | 33 | for f, spec in pairs (loadfile ("rockspecs.lua") ()) do |
32 | 34 | if f ~= "default" then |
33 | local specfile = "lrexlib-"..f:lower ().."-"..version.."-1.rockspec" | |
35 | local specfile = package_name.."-"..(f ~= "" and f:lower ().."-" or "")..version.."-1.rockspec" | |
34 | 36 | h = io.open (specfile, "w") |
35 | 37 | assert (h) |
36 | flavour = f -- a global, visible in loadfile | |
38 | flavour = f | |
37 | 39 | local specs = loadfile ("rockspecs.lua") () -- reload to get current flavour interpolated |
38 | local spec = table.merge (specs.default, specs[f]) | |
40 | local spec = tree.merge (tree (specs.default), tree (specs[f])) | |
39 | 41 | local s = "" |
40 | 42 | for i, v in pairs (spec) do |
41 | 43 | s = s..i.." = "..format (v, " ").."\n" |
4 | 4 | -- flavour: regex library |
5 | 5 | -- version |
6 | 6 | |
7 | local flavours = {"PCRE", "POSIX", "oniguruma", "TRE", "GNU"} | |
7 | local flavours = {"PCRE", "PCRE2", "POSIX", "oniguruma", "TRE", "GNU"} | |
8 | 8 | local version_dashed = version:gsub ("%.", "-") |
9 | -- FIXME: PCRE2 define should be only in PCRE2 rockspec | |
10 | local defines = {"VERSION=\""..version.."\"", | |
11 | "PCRE2_CODE_UNIT_WIDTH=8"} | |
9 | 12 | |
10 | 13 | -- FIXME: When Lua 5.1 support is dropped, use an env argument with |
11 | 14 | -- loadfile instead of wrapping in a table |
21 | 24 | description = { |
22 | 25 | summary = "Regular expression library binding ("..flavour.." flavour).", |
23 | 26 | detailed = [[ |
24 | Lrexlib is a regular expression library for Lua 5.1 and 5.2, which | |
27 | Lrexlib is a regular expression library for Lua 5.1-5.4, which | |
25 | 28 | provides bindings for several regular expression libraries. |
26 | 29 | This rock provides the ]]..flavour..[[ bindings.]], |
27 | 30 | homepage = "http://github.com/rrthomas/lrexlib", |
43 | 46 | type = "builtin", |
44 | 47 | modules = { |
45 | 48 | rex_pcre = { |
46 | defines = {"VERSION=\""..version.."\""}, | |
49 | defines = defines, | |
47 | 50 | sources = {"src/common.c", "src/pcre/lpcre.c", "src/pcre/lpcre_f.c"}, |
48 | 51 | libraries = {"pcre"}, |
49 | 52 | incdirs = {"$(PCRE_INCDIR)"}, |
50 | 53 | libdirs = {"$(PCRE_LIBDIR)"} |
54 | } | |
55 | } | |
56 | } | |
57 | }, | |
58 | ||
59 | PCRE2 = { | |
60 | external_dependencies = { | |
61 | PCRE2 = { | |
62 | header = "pcre2.h", | |
63 | library = "pcre2-8" | |
64 | } | |
65 | }, | |
66 | build = { | |
67 | type = "builtin", | |
68 | modules = { | |
69 | rex_pcre2 = { | |
70 | defines = defines, | |
71 | sources = {"src/common.c", "src/pcre2/lpcre2.c", "src/pcre2/lpcre2_f.c"}, | |
72 | libraries = {"pcre2-8"}, | |
73 | incdirs = {"$(PCRE2_INCDIR)"}, | |
74 | libdirs = {"$(PCRE2_LIBDIR)"} | |
51 | 75 | } |
52 | 76 | } |
53 | 77 | } |
63 | 87 | type = "builtin", |
64 | 88 | modules = { |
65 | 89 | rex_posix = { |
66 | defines = {"VERSION=\""..version.."\""}, | |
90 | defines = defines, | |
67 | 91 | sources = {"src/common.c", "src/posix/lposix.c"} |
68 | 92 | } |
69 | 93 | } |
81 | 105 | type = "builtin", |
82 | 106 | modules = { |
83 | 107 | rex_onig = { |
84 | defines = {"VERSION=\""..version.."\""}, | |
108 | defines = defines, | |
85 | 109 | sources = {"src/common.c", "src/oniguruma/lonig.c", "src/oniguruma/lonig_f.c"}, |
86 | 110 | libraries = {"onig"}, |
87 | 111 | incdirs = {"$(ONIG_INCDIR)"}, |
102 | 126 | type = "builtin", |
103 | 127 | modules = { |
104 | 128 | rex_tre = { |
105 | defines = {"VERSION=\""..version.."\""}, | |
129 | defines = defines, | |
106 | 130 | sources = {"src/common.c", "src/tre/ltre.c" --[[, "src/tre/tre_w.c"]]}, |
107 | 131 | libraries = {"tre"}, |
108 | 132 | incdirs = {"$(TRE_INCDIR)"}, |
122 | 146 | type = "builtin", |
123 | 147 | modules = { |
124 | 148 | rex_gnu = { |
125 | defines = {"VERSION=\""..version.."\""}, | |
149 | defines = defines, | |
126 | 150 | sources = {"src/common.c", "src/gnu/lgnu.c"} |
127 | 151 | } |
128 | 152 | } |
8 | 8 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE); |
9 | 9 | static int findmatch_exec (TUserdata *ud, TArgExec *argE); |
10 | 10 | static int split_exec (TUserdata *ud, TArgExec *argE, int offset); |
11 | static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset); | |
12 | static int gmatch_exec (TUserdata *ud, TArgExec *argE); | |
11 | 13 | static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud); |
12 | 14 | static int generate_error (lua_State *L, const TUserdata *ud, int errcode); |
13 | 15 | |
32 | 34 | #ifndef DO_NAMED_SUBPATTERNS |
33 | 35 | #define DO_NAMED_SUBPATTERNS(a,b,c) |
34 | 36 | #endif |
35 | ||
36 | /* When doing an iterative search, there can occur a situation of a zero-length | |
37 | * match at the current position, that prevents further advance on the subject | |
38 | * string. | |
39 | * There are two ways to handle that (AFAIK): | |
40 | * a) Advance by one character (continue the search from the next position), | |
41 | * or | |
42 | * b) Search for a non-zero-length match that begins from the current | |
43 | * position ("retry" the search). If the match is not found then advance | |
44 | * by one character. | |
45 | * The "b)" seems more correct, but most regex libraries expose no API for that. | |
46 | * The known exception is PCRE that has flags PCRE_NOTEMPTY and PCRE_ANCHORED. | |
47 | */ | |
48 | #ifdef ALG_USERETRY | |
49 | #define SET_RETRY(a,b) (a=b) | |
50 | static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset, int retry); | |
51 | static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry); | |
52 | #define GSUB_EXEC gsub_exec | |
53 | #define GMATCH_EXEC gmatch_exec | |
54 | #else | |
55 | #define SET_RETRY(a,b) ((void)a) | |
56 | static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset); | |
57 | static int gmatch_exec (TUserdata *ud, TArgExec *argE); | |
58 | #define GSUB_EXEC(a,b,c,d) gsub_exec(a,b,c) | |
59 | #define GMATCH_EXEC(a,b,c) gmatch_exec(a,b) | |
60 | #endif | |
61 | ||
62 | 37 | |
63 | 38 | #define METHOD_FIND 0 |
64 | 39 | #define METHOD_MATCH 1 |
80 | 55 | |
81 | 56 | |
82 | 57 | static int get_startoffset(lua_State *L, int stackpos, size_t len) { |
83 | int startoffset = luaL_optint(L, stackpos, 1); | |
58 | int startoffset = (int)luaL_optinteger(L, stackpos, 1); | |
84 | 59 | if(startoffset > 0) |
85 | 60 | startoffset--; |
86 | 61 | else if(startoffset < 0) { |
131 | 106 | if (type != LUA_TLIGHTUSERDATA) |
132 | 107 | luaL_error (L, "subject's topointer method returned %s (expected lightuserdata)", |
133 | 108 | lua_typename (L, type)); |
134 | argE->text = lua_touserdata (L, -1); | |
109 | argE->text = (const char*) lua_touserdata (L, -1); | |
135 | 110 | lua_pop (L, 1); |
136 | 111 | #if LUA_VERSION_NUM == 501 |
137 | lua_objlen (L, pos); | |
112 | if (luaL_callmeta (L, pos, "__len")) { | |
113 | if (lua_type (L, -1) != LUA_TNUMBER) | |
114 | luaL_argerror (L, pos, "subject's length is not a number"); | |
115 | argE->textlen = lua_tointeger (L, -1); | |
116 | lua_pop (L, 1); | |
117 | } | |
118 | else | |
119 | argE->textlen = lua_objlen (L, pos); | |
138 | 120 | #else |
139 | lua_len (L, pos); | |
140 | #endif | |
141 | type = lua_type (L, -1); | |
142 | if (type != LUA_TNUMBER) | |
143 | luaL_error (L, "subject's length is %s (expected number)", | |
144 | lua_typename (L, type)); | |
145 | argE->textlen = lua_tointeger (L, -1); | |
146 | lua_pop (L, 1); | |
121 | argE->textlen = luaL_len (L, pos); | |
122 | #endif | |
147 | 123 | } |
148 | 124 | } |
149 | 125 | |
154 | 130 | argC->ud = NULL; |
155 | 131 | } |
156 | 132 | else if ((argC->ud = test_ud (L, pos)) == NULL) |
157 | luaL_typerror(L, pos, "string or "REX_TYPENAME); | |
133 | luaL_typerror(L, pos, "string or " REX_TYPENAME); | |
158 | 134 | } |
159 | 135 | |
160 | 136 | static void checkarg_new (lua_State *L, TArgComp *argC) { |
171 | 147 | lua_tostring (L, 3); /* converts number (if any) to string */ |
172 | 148 | argE->reptype = lua_type (L, 3); |
173 | 149 | if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE && |
174 | argE->reptype != LUA_TFUNCTION && argE->reptype != LUA_TNIL && | |
175 | (argE->reptype != LUA_TBOOLEAN || | |
176 | (argE->reptype == LUA_TBOOLEAN && lua_toboolean (L, 3)))) { | |
177 | luaL_typerror (L, 3, "string, table, function, false or nil"); | |
150 | argE->reptype != LUA_TFUNCTION) { | |
151 | luaL_typerror (L, 3, "string, table or function"); | |
178 | 152 | } |
179 | 153 | argE->funcpos = 3; |
180 | 154 | argE->funcpos2 = 4; |
181 | 155 | argE->maxmatch = OptLimit (L, 4); |
182 | 156 | argC->cflags = ALG_GETCFLAGS (L, 5); |
183 | argE->eflags = luaL_optint (L, 6, ALG_EFLAGS_DFLT); | |
157 | argE->eflags = (int)luaL_optinteger (L, 6, ALG_EFLAGS_DFLT); | |
184 | 158 | ALG_GETCARGS (L, 7, argC); |
159 | } | |
160 | ||
161 | ||
162 | /* function count (s, patt, [cf], [ef], [larg...]) */ | |
163 | static void checkarg_count (lua_State *L, TArgComp *argC, TArgExec *argE) { | |
164 | check_subject (L, 1, argE); | |
165 | check_pattern (L, 2, argC); | |
166 | argC->cflags = ALG_GETCFLAGS (L, 3); | |
167 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
168 | ALG_GETCARGS (L, 5, argC); | |
185 | 169 | } |
186 | 170 | |
187 | 171 | |
192 | 176 | check_pattern (L, 2, argC); |
193 | 177 | argE->startoffset = get_startoffset (L, 3, argE->textlen); |
194 | 178 | argC->cflags = ALG_GETCFLAGS (L, 4); |
195 | argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT); | |
179 | argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT); | |
196 | 180 | ALG_GETCARGS (L, 6, argC); |
197 | 181 | } |
198 | 182 | |
203 | 187 | check_subject (L, 1, argE); |
204 | 188 | check_pattern (L, 2, argC); |
205 | 189 | argC->cflags = ALG_GETCFLAGS (L, 3); |
206 | argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT); | |
190 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
207 | 191 | ALG_GETCARGS (L, 5, argC); |
208 | 192 | } |
209 | 193 | |
216 | 200 | *ud = check_ud (L); |
217 | 201 | check_subject (L, 2, argE); |
218 | 202 | argE->startoffset = get_startoffset (L, 3, argE->textlen); |
219 | argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT); | |
203 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
220 | 204 | } |
221 | 205 | |
222 | 206 | |
243 | 227 | TUserdata *ud; |
244 | 228 | TArgComp argC; |
245 | 229 | TArgExec argE; |
246 | int n_match = 0, n_subst = 0, st = 0, retry; | |
230 | int n_match = 0, n_subst = 0, st = 0, last_to = -1; | |
247 | 231 | TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut; |
248 | 232 | TFreeList freelist; |
249 | 233 | /*------------------------------------------------------------------*/ |
266 | 250 | } |
267 | 251 | /*------------------------------------------------------------------*/ |
268 | 252 | buffer_init (&BufOut, 1024, L, &freelist); |
269 | SET_RETRY (retry, 0); | |
270 | 253 | while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (int)argE.textlen) { |
271 | 254 | int from, to, res; |
272 | 255 | int curr_subst = 0; |
273 | res = GSUB_EXEC (ud, &argE, st, retry); | |
256 | res = gsub_exec (ud, &argE, st); | |
274 | 257 | if (ALG_NOMATCH (res)) { |
275 | #ifdef ALG_USERETRY | |
276 | if (retry) { | |
277 | if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ | |
278 | buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); | |
279 | st += ALG_CHARSIZE; | |
280 | retry = 0; | |
281 | continue; | |
282 | } | |
283 | } | |
284 | #endif | |
285 | 258 | break; |
286 | 259 | } |
287 | 260 | else if (!ALG_ISMATCH (res)) { |
288 | 261 | freelist_free (&freelist); |
289 | 262 | return generate_error (L, ud, res); |
290 | 263 | } |
291 | ++n_match; | |
292 | 264 | from = ALG_BASE(st) + ALG_SUBBEG(ud,0); |
293 | 265 | to = ALG_BASE(st) + ALG_SUBEND(ud,0); |
266 | if (to == last_to) { /* discard an empty match adjacent to the previous match */ | |
267 | if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ | |
268 | buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); | |
269 | st += ALG_CHARSIZE; | |
270 | continue; | |
271 | } | |
272 | break; | |
273 | } | |
274 | last_to = to; | |
275 | ++n_match; | |
294 | 276 | if (st < from) { |
295 | 277 | buffer_addlstring (&BufOut, argE.text + st, from - st); |
296 | 278 | #ifdef ALG_PULL |
333 | 315 | freelist_free (&freelist); |
334 | 316 | return lua_error (L); /* re-raise the error */ |
335 | 317 | } |
336 | } | |
337 | /*----------------------------------------------------------------*/ | |
338 | else if (argE.reptype == LUA_TNIL || argE.reptype == LUA_TBOOLEAN) { | |
339 | buffer_addlstring (pBuf, argE.text + from, to - from); | |
340 | 318 | } |
341 | 319 | /*----------------------------------------------------------------*/ |
342 | 320 | if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) { |
400 | 378 | n_subst += curr_subst; |
401 | 379 | if (st < to) { |
402 | 380 | st = to; |
403 | SET_RETRY (retry, 0); | |
404 | 381 | } |
405 | 382 | else if (st < (int)argE.textlen) { |
406 | #ifdef ALG_USERETRY | |
407 | retry = 1; | |
408 | #else | |
409 | 383 | /* advance by 1 char (not replaced) */ |
410 | 384 | buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); |
411 | 385 | st += ALG_CHARSIZE; |
412 | #endif | |
413 | 386 | } |
414 | 387 | else break; |
415 | 388 | } |
420 | 393 | lua_pushinteger (L, n_subst); |
421 | 394 | freelist_free (&freelist); |
422 | 395 | return 3; |
396 | } | |
397 | ||
398 | ||
399 | static int algf_count (lua_State *L) { | |
400 | TUserdata *ud; | |
401 | TArgComp argC; | |
402 | TArgExec argE; | |
403 | int n_match = 0, st = 0, last_to = -1; | |
404 | /*------------------------------------------------------------------*/ | |
405 | checkarg_count (L, &argC, &argE); | |
406 | if (argC.ud) { | |
407 | ud = (TUserdata*) argC.ud; | |
408 | lua_pushvalue (L, 2); | |
409 | } | |
410 | else compile_regex (L, &argC, &ud); | |
411 | /*------------------------------------------------------------------*/ | |
412 | while (st <= (int)argE.textlen) { | |
413 | int to, res; | |
414 | res = gsub_exec (ud, &argE, st); | |
415 | if (ALG_NOMATCH (res)) { | |
416 | break; | |
417 | } | |
418 | else if (!ALG_ISMATCH (res)) { | |
419 | return generate_error (L, ud, res); | |
420 | } | |
421 | to = ALG_BASE(st) + ALG_SUBEND(ud,0); | |
422 | if (to == last_to) { /* discard an empty match adjacent to the previous match */ | |
423 | if (st < (int)argE.textlen) { /* advance by 1 char */ | |
424 | st += ALG_CHARSIZE; | |
425 | continue; | |
426 | } | |
427 | break; | |
428 | } | |
429 | last_to = to; | |
430 | ++n_match; | |
431 | #ifdef ALG_PULL | |
432 | { | |
433 | int from = ALG_BASE(st) + ALG_SUBBEG(ud,0); | |
434 | if (st < from) | |
435 | st = from; | |
436 | } | |
437 | #endif | |
438 | /*----------------------------------------------------------------*/ | |
439 | if (st < to) { | |
440 | st = to; | |
441 | } | |
442 | else if (st < (int)argE.textlen) { | |
443 | /* advance by 1 char (not replaced) */ | |
444 | st += ALG_CHARSIZE; | |
445 | } | |
446 | else break; | |
447 | } | |
448 | /*------------------------------------------------------------------*/ | |
449 | lua_pushinteger (L, n_match); | |
450 | return 1; | |
423 | 451 | } |
424 | 452 | |
425 | 453 | |
475 | 503 | |
476 | 504 | |
477 | 505 | static int gmatch_iter (lua_State *L) { |
478 | int retry; | |
506 | int last_end, res; | |
479 | 507 | TArgExec argE; |
480 | 508 | TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); |
481 | 509 | argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); |
482 | 510 | argE.eflags = lua_tointeger (L, lua_upvalueindex (3)); |
483 | 511 | argE.startoffset = lua_tointeger (L, lua_upvalueindex (4)); |
484 | #ifdef ALG_USERETRY | |
485 | retry = lua_tointeger (L, lua_upvalueindex (5)); | |
486 | #endif | |
487 | ||
488 | if (argE.startoffset > (int)argE.textlen) | |
489 | return 0; | |
512 | last_end = lua_tointeger (L, lua_upvalueindex (5)); | |
490 | 513 | |
491 | 514 | while (1) { |
492 | int res = GMATCH_EXEC (ud, &argE, retry); | |
515 | if (argE.startoffset > (int)argE.textlen) | |
516 | return 0; | |
517 | res = gmatch_exec (ud, &argE); | |
493 | 518 | if (ALG_ISMATCH (res)) { |
494 | 519 | int incr = 0; |
495 | if (ALG_SUBLEN(ud,0)) { | |
496 | SET_RETRY (retry, 0); | |
497 | } | |
498 | else { /* no progress: prevent endless loop */ | |
499 | #ifdef ALG_USERETRY | |
500 | SET_RETRY (retry, 1); | |
501 | #else | |
520 | if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */ | |
521 | if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) { | |
522 | argE.startoffset += ALG_CHARSIZE; | |
523 | continue; | |
524 | } | |
502 | 525 | incr = ALG_CHARSIZE; |
503 | #endif | |
504 | } | |
505 | lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */ | |
526 | } | |
527 | last_end = ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0); | |
528 | lua_pushinteger(L, last_end + incr); /* update start offset */ | |
506 | 529 | lua_replace (L, lua_upvalueindex (4)); |
507 | #ifdef ALG_USERETRY | |
508 | lua_pushinteger (L, retry); | |
509 | lua_replace (L, lua_upvalueindex (5)); /* update retry */ | |
510 | #endif | |
530 | lua_pushinteger(L, last_end); /* update last end of match */ | |
531 | lua_replace (L, lua_upvalueindex (5)); | |
511 | 532 | /* push either captures or entire match */ |
512 | 533 | if (ALG_NSUB(ud)) { |
513 | 534 | push_substrings (L, ud, argE.text, NULL); |
518 | 539 | return 1; |
519 | 540 | } |
520 | 541 | } |
521 | else if (ALG_NOMATCH (res)) { | |
522 | #ifdef ALG_USERETRY | |
523 | if (retry) { | |
524 | if (argE.startoffset < (int)argE.textlen) { | |
525 | ++argE.startoffset; /* advance by 1 char */ | |
526 | SET_RETRY (retry, 0); | |
527 | continue; | |
528 | } | |
529 | } | |
530 | #endif | |
542 | else if (ALG_NOMATCH (res)) | |
531 | 543 | return 0; |
532 | } | |
533 | 544 | else |
534 | 545 | return generate_error (L, ud, res); |
535 | 546 | } |
537 | 548 | |
538 | 549 | |
539 | 550 | static int split_iter (lua_State *L) { |
540 | int incr, newoffset, res; | |
551 | int incr, last_end, newoffset, res; | |
541 | 552 | TArgExec argE; |
542 | 553 | TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); |
543 | 554 | argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); |
544 | 555 | argE.eflags = lua_tointeger (L, lua_upvalueindex (3)); |
545 | 556 | argE.startoffset = lua_tointeger (L, lua_upvalueindex (4)); |
546 | 557 | incr = lua_tointeger (L, lua_upvalueindex (5)); |
547 | ||
548 | if (argE.startoffset > (int)argE.textlen) | |
558 | last_end = lua_tointeger (L, lua_upvalueindex (6)); | |
559 | ||
560 | if (incr < 0) | |
549 | 561 | return 0; |
550 | 562 | |
551 | if ((newoffset = argE.startoffset + incr) > (int)argE.textlen) | |
552 | goto nomatch; | |
553 | ||
554 | res = split_exec (ud, &argE, newoffset); | |
555 | if (ALG_ISMATCH (res)) { | |
556 | lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */ | |
557 | lua_replace (L, lua_upvalueindex (4)); | |
558 | lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */ | |
559 | lua_replace (L, lua_upvalueindex (5)); | |
560 | /* push text preceding the match */ | |
561 | lua_pushlstring (L, argE.text + argE.startoffset, | |
562 | ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset); | |
563 | /* push either captures or entire match */ | |
564 | if (ALG_NSUB(ud)) { | |
565 | push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL); | |
566 | return 1 + ALG_NSUB(ud); | |
567 | } | |
568 | else { | |
569 | ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0); | |
570 | return 2; | |
571 | } | |
572 | } | |
573 | else if (ALG_NOMATCH (res)) | |
574 | goto nomatch; | |
575 | else | |
576 | return generate_error (L, ud, res); | |
577 | ||
578 | nomatch: | |
579 | lua_pushinteger (L, argE.textlen + 1); /* mark as last iteration */ | |
580 | lua_replace (L, lua_upvalueindex (4)); /* update start offset */ | |
563 | while (1) { | |
564 | if ((newoffset = argE.startoffset + incr) > (int)argE.textlen) | |
565 | break; | |
566 | res = split_exec (ud, &argE, newoffset); | |
567 | if (ALG_ISMATCH (res)) { | |
568 | if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */ | |
569 | if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) { | |
570 | incr += ALG_CHARSIZE; | |
571 | continue; | |
572 | } | |
573 | } | |
574 | lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset and last_end */ | |
575 | lua_pushvalue (L, -1); | |
576 | lua_replace (L, lua_upvalueindex (4)); | |
577 | lua_replace (L, lua_upvalueindex (6)); | |
578 | lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */ | |
579 | lua_replace (L, lua_upvalueindex (5)); | |
580 | /* push text preceding the match */ | |
581 | lua_pushlstring (L, argE.text + argE.startoffset, | |
582 | ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset); | |
583 | /* push either captures or entire match */ | |
584 | if (ALG_NSUB(ud)) { | |
585 | push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL); | |
586 | return 1 + ALG_NSUB(ud); | |
587 | } | |
588 | else { | |
589 | ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0); | |
590 | return 2; | |
591 | } | |
592 | } | |
593 | else if (ALG_NOMATCH (res)) | |
594 | break; | |
595 | else | |
596 | return generate_error (L, ud, res); | |
597 | } | |
598 | lua_pushinteger (L, -1); /* mark as last iteration */ | |
599 | lua_replace (L, lua_upvalueindex (5)); /* incr = -1 */ | |
581 | 600 | lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset); |
582 | 601 | return 1; |
583 | 602 | } |
587 | 606 | { |
588 | 607 | TArgComp argC; |
589 | 608 | TArgExec argE; |
590 | TUserdata *ud; | |
591 | 609 | checkarg_gmatch_split (L, &argC, &argE); |
592 | if (argC.ud) { | |
593 | ud = (TUserdata*) argC.ud; | |
610 | if (argC.ud) | |
594 | 611 | lua_pushvalue (L, 2); |
595 | } | |
596 | else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */ | |
612 | else | |
613 | compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */ | |
597 | 614 | gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ |
598 | 615 | lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ |
599 | 616 | lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ |
600 | #ifdef ALG_USERETRY | |
601 | lua_pushinteger (L, 0); /* 5-th upvalue: retry */ | |
617 | lua_pushinteger (L, -1); /* 5-th upvalue: last end of match */ | |
602 | 618 | lua_pushcclosure (L, gmatch_iter, 5); |
603 | #else | |
604 | lua_pushcclosure (L, gmatch_iter, 4); | |
605 | #endif | |
606 | 619 | return 1; |
607 | 620 | } |
608 | 621 | |
610 | 623 | { |
611 | 624 | TArgComp argC; |
612 | 625 | TArgExec argE; |
613 | TUserdata *ud; | |
614 | 626 | checkarg_gmatch_split (L, &argC, &argE); |
615 | if (argC.ud) { | |
616 | ud = (TUserdata*) argC.ud; | |
627 | if (argC.ud) | |
617 | 628 | lua_pushvalue (L, 2); |
618 | } | |
619 | else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */ | |
629 | else | |
630 | compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */ | |
620 | 631 | gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ |
621 | 632 | lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ |
622 | 633 | lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ |
623 | 634 | lua_pushinteger (L, 0); /* 5-th upvalue: incr */ |
624 | lua_pushcclosure (L, split_iter, 5); | |
635 | lua_pushinteger (L, -1); /* 6-th upvalue: last_end */ | |
636 | lua_pushcclosure (L, split_iter, 6); | |
625 | 637 | return 1; |
626 | 638 | } |
627 | 639 | |
734 | 746 | #endif |
735 | 747 | lua_pushfstring (L, REX_VERSION" (for %s)", name); |
736 | 748 | lua_setfield (L, -2, "_VERSION"); |
737 | } | |
749 | #ifndef REX_NOEMBEDDEDTEST | |
750 | lua_pushcfunction (L, newmembuffer); | |
751 | lua_setfield (L, -2, "_newmembuffer"); | |
752 | #endif | |
753 | } |
125 | 125 | enum { ID_NUMBER, ID_STRING }; |
126 | 126 | |
127 | 127 | void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl) { |
128 | buf->arr = Lmalloc(L, sz); | |
128 | buf->arr = (char*) Lmalloc(L, sz); | |
129 | 129 | if (!buf->arr) { |
130 | 130 | freelist_free (fl); |
131 | 131 | luaL_error (L, "malloc failed"); |
263 | 263 | return luaL_argerror(L, narg, msg); |
264 | 264 | } |
265 | 265 | #endif |
266 | ||
267 | #ifndef REX_NOEMBEDDEDTEST | |
268 | static int ud_topointer (lua_State *L) { | |
269 | lua_pushlightuserdata (L, lua_touserdata (L, 1)); | |
270 | return 1; | |
271 | } | |
272 | ||
273 | static int ud_len (lua_State *L) { | |
274 | lua_pushinteger (L, lua_objlen (L, 1)); | |
275 | return 1; | |
276 | } | |
277 | ||
278 | /* for testing purposes only */ | |
279 | int newmembuffer (lua_State *L) { | |
280 | size_t len; | |
281 | const char* s = luaL_checklstring (L, 1, &len); | |
282 | void *ud = lua_newuserdata (L, len); | |
283 | memcpy (ud, s, len); | |
284 | lua_newtable (L); /* metatable */ | |
285 | lua_pushvalue (L, -1); | |
286 | lua_setfield (L, -2, "__index"); /* metatable.__index = metatable */ | |
287 | lua_pushcfunction (L, ud_topointer); | |
288 | lua_setfield (L, -2, "topointer"); | |
289 | lua_pushcfunction (L, ud_len); | |
290 | lua_setfield (L, -2, "__len"); | |
291 | lua_setmetatable (L, -2); | |
292 | return 1; | |
293 | } | |
294 | #endif /* #ifndef REX_NOEMBEDDEDTEST */ |
6 | 6 | #include "lua.h" |
7 | 7 | |
8 | 8 | #if LUA_VERSION_NUM > 501 |
9 | # define lua_objlen lua_rawlen | |
9 | 10 | int luaL_typerror (lua_State *L, int narg, const char *tname); |
10 | 11 | #endif |
11 | 12 | |
94 | 95 | void *Lrealloc (lua_State *L, void *p, size_t osize, size_t nsize); |
95 | 96 | void Lfree (lua_State *L, void *p, size_t size); |
96 | 97 | |
98 | #ifndef REX_NOEMBEDDEDTEST | |
99 | int newmembuffer (lua_State *L); | |
97 | 100 | #endif |
101 | ||
102 | #endif |
32 | 32 | #define ALG_CFLAGS_DFLT RE_SYNTAX_POSIX_EXTENDED |
33 | 33 | #define ALG_EFLAGS_DFLT 0 |
34 | 34 | |
35 | #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) | |
35 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
36 | 36 | |
37 | 37 | static const unsigned char *gettranslate (lua_State *L, int pos); |
38 | 38 | #define ALG_GETCARGS(L,pos,argC) argC->translate = gettranslate (L, pos) |
49 | 49 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) |
50 | 50 | |
51 | 51 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ |
52 | (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
52 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
53 | 53 | |
54 | 54 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) |
55 | 55 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) |
298 | 298 | { "find", algf_find }, |
299 | 299 | { "gmatch", algf_gmatch }, |
300 | 300 | { "gsub", algf_gsub }, |
301 | { "count", algf_count }, | |
301 | 302 | { "split", algf_split }, |
302 | 303 | { "new", algf_new }, |
303 | 304 | { "flags", Gnu_get_flags }, |
54 | 54 | |
55 | 55 | #define ALG_BASE(st) 0 |
56 | 56 | #define ALG_PULL |
57 | /* #define ALG_USERETRY */ | |
58 | 57 | |
59 | 58 | typedef struct { |
60 | 59 | regex_t *reg; |
102 | 101 | |
103 | 102 | static int generate_error (lua_State *L, const TOnig *ud, int errcode) { |
104 | 103 | char buf [ONIG_MAX_ERROR_MESSAGE_LEN]; |
105 | onig_error_code_to_str(buf, errcode, ud->einfo); | |
104 | onig_error_code_to_str((unsigned char*) buf, errcode, &ud->einfo); | |
106 | 105 | return luaL_error(L, buf); |
107 | 106 | } |
108 | 107 | |
206 | 205 | */ |
207 | 206 | static int LOnig_setdefaultsyntax (lua_State *L) { |
208 | 207 | (void)luaL_checkstring(L, 1); |
209 | onig_set_default_syntax(getsyntax(L, 1)); | |
208 | onig_set_default_syntax((OnigSyntaxType*) getsyntax(L, 1)); | |
210 | 209 | return 0; |
211 | 210 | } |
212 | 211 | |
286 | 285 | return gsub_exec(ud, argE, st); |
287 | 286 | } |
288 | 287 | |
288 | static int LOnig_capturecount (lua_State *L) { | |
289 | TOnig *ud = check_ud(L); | |
290 | lua_pushinteger(L, onig_number_of_captures(ud->reg)); | |
291 | return 1; | |
292 | } | |
293 | ||
289 | 294 | static int LOnig_gc (lua_State *L) { |
290 | 295 | TOnig *ud = check_ud (L); |
291 | 296 | if (ud->reg) { /* precaution against "manual" __gc calling */ |
313 | 318 | return 1; |
314 | 319 | } |
315 | 320 | |
321 | static int LOnig_internal_test (lua_State *L) { | |
322 | unsigned int i; | |
323 | for (i=1; i<sizeof(Encodings)/sizeof(Encodings[0]); i++) { | |
324 | if (fcmp(&Encodings[i-1], &Encodings[i]) >= 0) { | |
325 | lua_pushboolean(L, 0); | |
326 | lua_pushstring(L, "Array 'Encodings' is not properly sorted."); | |
327 | return 2; | |
328 | } | |
329 | } | |
330 | for (i=1; i<sizeof(Syntaxes)/sizeof(Syntaxes[0]); i++) { | |
331 | if (fcmp(&Syntaxes[i-1], &Syntaxes[i]) >= 0) { | |
332 | lua_pushboolean(L, 0); | |
333 | lua_pushstring(L, "Array 'Syntaxes' is not properly sorted."); | |
334 | return 2; | |
335 | } | |
336 | } | |
337 | lua_pushboolean(L, 1); | |
338 | return 1; | |
339 | } | |
340 | ||
316 | 341 | static const luaL_Reg r_methods[] = { |
317 | 342 | { "exec", algm_exec }, |
318 | 343 | { "tfind", algm_tfind }, /* old name: match */ |
319 | 344 | { "find", algm_find }, |
320 | 345 | { "match", algm_match }, |
346 | { "capturecount", LOnig_capturecount }, | |
321 | 347 | { "__gc", LOnig_gc }, |
322 | 348 | { "__tostring", LOnig_tostring }, |
323 | 349 | { NULL, NULL } |
328 | 354 | { "find", algf_find }, |
329 | 355 | { "gmatch", algf_gmatch }, |
330 | 356 | { "gsub", algf_gsub }, |
357 | { "count", algf_count }, | |
331 | 358 | { "split", algf_split }, |
332 | 359 | { "new", algf_new }, |
333 | 360 | { "flags", LOnig_get_flags }, |
334 | 361 | { "version", LOnig_version }, |
335 | 362 | { "setdefaultsyntax", LOnig_setdefaultsyntax }, |
363 | { "internal_test", LOnig_internal_test }, | |
336 | 364 | { NULL, NULL } |
337 | 365 | }; |
338 | 366 |
4 | 4 | #include <string.h> |
5 | 5 | #include <locale.h> |
6 | 6 | #include <ctype.h> |
7 | #include <stdint.h> | |
7 | 8 | #include <pcre.h> |
8 | 9 | |
9 | 10 | #include "lua.h" |
47 | 48 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) |
48 | 49 | |
49 | 50 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ |
50 | (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
51 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
51 | 52 | |
52 | 53 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) |
53 | 54 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) |
56 | 57 | |
57 | 58 | #define ALG_BASE(st) 0 |
58 | 59 | #define ALG_PULL |
59 | #define ALG_USERETRY | |
60 | 60 | |
61 | 61 | typedef struct { |
62 | 62 | pcre * pr; |
85 | 85 | /* Functions |
86 | 86 | ****************************************************************************** |
87 | 87 | */ |
88 | ||
89 | static void push_chartables_meta (lua_State *L) { | |
90 | lua_pushinteger (L, INDEX_CHARTABLES_META); | |
91 | lua_rawget (L, ALG_ENVIRONINDEX); | |
92 | } | |
93 | 88 | |
94 | 89 | static int getcflags (lua_State *L, int pos) { |
95 | 90 | switch (lua_type (L, pos)) { |
131 | 126 | *ud = check_ud (L); |
132 | 127 | argE->text = luaL_checklstring (L, 2, &argE->textlen); |
133 | 128 | argE->startoffset = get_startoffset (L, 3, argE->textlen); |
134 | argE->eflags = luaL_optint (L, 4, ALG_EFLAGS_DFLT); | |
135 | argE->ovecsize = luaL_optint (L, 5, 100); | |
136 | argE->wscount = luaL_optint (L, 6, 50); | |
137 | } | |
138 | #endif | |
129 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
130 | argE->ovecsize = (size_t)luaL_optinteger (L, 5, 100); | |
131 | argE->wscount = (size_t)luaL_optinteger (L, 6, 50); | |
132 | } | |
133 | #endif | |
134 | ||
135 | static void push_chartables_meta (lua_State *L) { | |
136 | lua_pushinteger (L, INDEX_CHARTABLES_META); | |
137 | lua_rawget (L, ALG_ENVIRONINDEX); | |
138 | } | |
139 | 139 | |
140 | 140 | static int Lpcre_maketables (lua_State *L) { |
141 | 141 | *(const void**)lua_newuserdata (L, sizeof(void*)) = pcre_maketables(); |
168 | 168 | return 0; |
169 | 169 | } |
170 | 170 | |
171 | static int chartables_tostring (lua_State *L) { | |
172 | void **ud = check_chartables (L, 1); | |
173 | lua_pushfstring (L, "%s (%p)", chartables_typename, ud); | |
174 | return 1; | |
175 | } | |
176 | ||
171 | 177 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) { |
172 | 178 | argC->locale = NULL; |
173 | 179 | argC->tables = NULL; |
176 | 182 | argC->locale = lua_tostring (L, pos); |
177 | 183 | else { |
178 | 184 | argC->tablespos = pos; |
179 | argC->tables = *check_chartables (L, pos); | |
185 | argC->tables = (const unsigned char*) *check_chartables (L, pos); | |
180 | 186 | } |
181 | 187 | } |
182 | 188 | } |
295 | 301 | } |
296 | 302 | #endif /* #if PCRE_MAJOR >= 6 */ |
297 | 303 | |
298 | #ifdef ALG_USERETRY | |
299 | static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry) { | |
300 | int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags; | |
301 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
302 | argE->startoffset, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
303 | } | |
304 | #else | |
305 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
306 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
307 | argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
308 | } | |
309 | #endif | |
304 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
305 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
306 | argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
307 | } | |
310 | 308 | |
311 | 309 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { |
312 | 310 | lua_pushlstring (L, argE->text, argE->textlen); |
317 | 315 | argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); |
318 | 316 | } |
319 | 317 | |
320 | #ifdef ALG_USERETRY | |
321 | static int gsub_exec (TPcre *ud, TArgExec *argE, int st, int retry) { | |
322 | int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags; | |
323 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
324 | st, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
325 | } | |
326 | #else | |
327 | static int gsub_exec (TPcre *ud, TArgExec *argE, int st) { | |
328 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
329 | st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
330 | } | |
331 | #endif | |
318 | static int gsub_exec (TPcre *ud, TArgExec *argE, int st) { | |
319 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
320 | st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
321 | } | |
332 | 322 | |
333 | 323 | static int split_exec (TPcre *ud, TArgExec *argE, int offset) { |
334 | 324 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset, |
356 | 346 | return 1; |
357 | 347 | } |
358 | 348 | |
359 | static int chartables_tostring (lua_State *L) { | |
360 | void **ud = check_chartables (L, 1); | |
361 | lua_pushfstring (L, "%s (%p)", chartables_typename, ud); | |
362 | return 1; | |
363 | } | |
364 | ||
365 | 349 | static int Lpcre_version (lua_State *L) { |
366 | 350 | lua_pushstring (L, pcre_version ()); |
351 | return 1; | |
352 | } | |
353 | ||
354 | #define SET_INFO_FIELD(L,ud,what,name,valtype) { \ | |
355 | valtype val; \ | |
356 | if (0 == pcre_fullinfo (ud->pr, ud->extra, what, &val)) { \ | |
357 | lua_pushnumber (L, val); \ | |
358 | lua_setfield (L, -2, name); \ | |
359 | } \ | |
360 | } | |
361 | ||
362 | static int Lpcre_fullinfo (lua_State *L) { | |
363 | TPcre *ud = check_ud (L); | |
364 | lua_newtable(L); | |
365 | ||
366 | SET_INFO_FIELD (L, ud, PCRE_INFO_BACKREFMAX, "BACKREFMAX", int) | |
367 | SET_INFO_FIELD (L, ud, PCRE_INFO_CAPTURECOUNT, "CAPTURECOUNT", int) | |
368 | SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTBYTE, "FIRSTBYTE", int) | |
369 | SET_INFO_FIELD (L, ud, PCRE_INFO_HASCRORLF, "HASCRORLF", int) | |
370 | SET_INFO_FIELD (L, ud, PCRE_INFO_JCHANGED, "JCHANGED", int) | |
371 | #ifdef PCRE_INFO_JIT | |
372 | SET_INFO_FIELD (L, ud, PCRE_INFO_JIT, "JIT", int) | |
373 | #endif | |
374 | #ifdef PCRE_INFO_JITSIZE | |
375 | SET_INFO_FIELD (L, ud, PCRE_INFO_JITSIZE, "JITSIZE", size_t); | |
376 | #endif | |
377 | #ifdef PCRE_INFO_MATCH_EMPTY | |
378 | SET_INFO_FIELD (L, ud, PCRE_INFO_MATCH_EMPTY, "MATCH_EMPTY", int) | |
379 | #endif | |
380 | #ifdef PCRE_INFO_MATCHLIMIT | |
381 | SET_INFO_FIELD (L, ud, PCRE_INFO_MATCHLIMIT, "MATCHLIMIT", uint32_t) | |
382 | #endif | |
383 | #ifdef PCRE_INFO_MAXLOOKBEHIND | |
384 | SET_INFO_FIELD (L, ud, PCRE_INFO_MAXLOOKBEHIND, "MAXLOOKBEHIND", int) /* int ? */ | |
385 | #endif | |
386 | #ifdef PCRE_INFO_MINLENGTH | |
387 | SET_INFO_FIELD (L, ud, PCRE_INFO_MINLENGTH, "MINLENGTH", int) | |
388 | #endif | |
389 | SET_INFO_FIELD (L, ud, PCRE_INFO_OKPARTIAL, "OKPARTIAL", int) | |
390 | SET_INFO_FIELD (L, ud, PCRE_INFO_OPTIONS, "OPTIONS", unsigned long) | |
391 | #ifdef PCRE_INFO_RECURSIONLIMIT | |
392 | SET_INFO_FIELD (L, ud, PCRE_INFO_RECURSIONLIMIT, "RECURSIONLIMIT", uint32_t) | |
393 | #endif | |
394 | SET_INFO_FIELD (L, ud, PCRE_INFO_SIZE, "SIZE", size_t) | |
395 | SET_INFO_FIELD (L, ud, PCRE_INFO_STUDYSIZE, "STUDYSIZE", size_t) | |
396 | #ifdef PCRE_INFO_FIRSTCHARACTERFLAGS | |
397 | SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTCHARACTERFLAGS, "FIRSTCHARACTERFLAGS", int) | |
398 | #endif | |
399 | #ifdef PCRE_INFO_FIRSTCHARACTER | |
400 | SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTCHARACTER, "FIRSTCHARACTER", uint32_t) | |
401 | #endif | |
402 | #ifdef PCRE_INFO_REQUIREDCHARFLAGS | |
403 | SET_INFO_FIELD (L, ud, PCRE_INFO_REQUIREDCHARFLAGS, "REQUIREDCHARFLAGS", int) | |
404 | #endif | |
405 | #ifdef PCRE_INFO_REQUIREDCHAR | |
406 | SET_INFO_FIELD (L, ud, PCRE_INFO_REQUIREDCHAR, "REQUIREDCHAR", uint32_t) | |
407 | #endif | |
408 | ||
367 | 409 | return 1; |
368 | 410 | } |
369 | 411 | |
381 | 423 | #if PCRE_MAJOR >= 6 |
382 | 424 | { "dfa_exec", Lpcre_dfa_exec }, |
383 | 425 | #endif |
426 | { "fullinfo", Lpcre_fullinfo }, | |
384 | 427 | { "__gc", Lpcre_gc }, |
385 | 428 | { "__tostring", Lpcre_tostring }, |
386 | 429 | { NULL, NULL } |
391 | 434 | { "find", algf_find }, |
392 | 435 | { "gmatch", algf_gmatch }, |
393 | 436 | { "gsub", algf_gsub }, |
437 | { "count", algf_count }, | |
394 | 438 | { "split", algf_split }, |
395 | 439 | { "new", algf_new }, |
396 | 440 | { "flags", Lpcre_get_flags }, |
0 | /* lpcre2.c - Lua binding of PCRE2 library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <locale.h> | |
6 | #include <ctype.h> | |
7 | #include <stdint.h> | |
8 | #include <pcre2.h> | |
9 | ||
10 | #include "lua.h" | |
11 | #include "lauxlib.h" | |
12 | #include "../common.h" | |
13 | ||
14 | extern int Lpcre2_get_flags (lua_State *L); | |
15 | extern int Lpcre2_config (lua_State *L); | |
16 | extern flag_pair pcre2_error_flags[]; | |
17 | ||
18 | /* These 2 settings may be redefined from the command-line or the makefile. | |
19 | * They should be kept in sync between themselves and with the target name. | |
20 | */ | |
21 | #ifndef REX_LIBNAME | |
22 | # define REX_LIBNAME "rex_pcre2" | |
23 | #endif | |
24 | #ifndef REX_OPENLIB | |
25 | # define REX_OPENLIB luaopen_rex_pcre2 | |
26 | #endif | |
27 | ||
28 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
29 | ||
30 | #define ALG_CFLAGS_DFLT 0 | |
31 | #define ALG_EFLAGS_DFLT 0 | |
32 | ||
33 | static int getcflags (lua_State *L, int pos); | |
34 | #define ALG_GETCFLAGS(L,pos) getcflags(L, pos) | |
35 | ||
36 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC); | |
37 | #define ALG_GETCARGS(a,b,c) checkarg_compile(a,b,c) | |
38 | ||
39 | #define ALG_NOMATCH(res) ((res) == PCRE2_ERROR_NOMATCH) | |
40 | #define ALG_ISMATCH(res) ((res) >= 0) | |
41 | #define ALG_SUBBEG(ud,n) ((int)(ud)->ovector[(n)+(n)]) | |
42 | #define ALG_SUBEND(ud,n) ((int)(ud)->ovector[(n)+(n)+1]) | |
43 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND((ud),(n)) - ALG_SUBBEG((ud),(n))) | |
44 | #define ALG_SUBVALID(ud,n) (0 == pcre2_substring_length_bynumber((ud)->match_data, (n), NULL)) | |
45 | #define ALG_NSUB(ud) ((int)(ud)->ncapt) | |
46 | ||
47 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
48 | lua_pushlstring (L, (text) + ALG_SUBBEG((ud),(n)), ALG_SUBLEN((ud),(n))) | |
49 | ||
50 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
51 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
52 | ||
53 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) | |
54 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) | |
55 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
56 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
57 | ||
58 | #define ALG_BASE(st) 0 | |
59 | #define ALG_PULL | |
60 | ||
61 | typedef struct { | |
62 | pcre2_code *pr; | |
63 | pcre2_compile_context *ccontext; | |
64 | pcre2_match_data *match_data; | |
65 | PCRE2_SIZE *ovector; | |
66 | int ncapt; | |
67 | const unsigned char *tables; | |
68 | int freed; | |
69 | } TPcre2; | |
70 | ||
71 | #define TUserdata TPcre2 | |
72 | ||
73 | static void do_named_subpatterns (lua_State *L, TPcre2 *ud, const char *text); | |
74 | # define DO_NAMED_SUBPATTERNS do_named_subpatterns | |
75 | ||
76 | #include "../algo.h" | |
77 | ||
78 | /* Locations of the 2 permanent tables in the function environment */ | |
79 | #define INDEX_CHARTABLES_META 1 /* chartables type's metatable */ | |
80 | #define INDEX_CHARTABLES_LINK 2 /* link chartables to compiled regex */ | |
81 | ||
82 | const char chartables_typename[] = "chartables"; | |
83 | ||
84 | /* Functions | |
85 | ****************************************************************************** | |
86 | */ | |
87 | ||
88 | static int push_error_message (lua_State *L, int errorcode) //### is this function needed? | |
89 | { | |
90 | PCRE2_UCHAR buf[256]; | |
91 | if (pcre2_get_error_message(errorcode, buf, 256) > 0) | |
92 | { | |
93 | lua_pushstring(L, (const char*)buf); | |
94 | return 1; | |
95 | } | |
96 | return 0; | |
97 | } | |
98 | ||
99 | static int getcflags (lua_State *L, int pos) { | |
100 | switch (lua_type (L, pos)) { | |
101 | case LUA_TNONE: | |
102 | case LUA_TNIL: | |
103 | return ALG_CFLAGS_DFLT; | |
104 | case LUA_TNUMBER: | |
105 | return lua_tointeger (L, pos); | |
106 | case LUA_TSTRING: { | |
107 | const char *s = lua_tostring (L, pos); | |
108 | int res = 0, ch; | |
109 | while ((ch = *s++) != '\0') { | |
110 | if (ch == 'i') res |= PCRE2_CASELESS; | |
111 | else if (ch == 'm') res |= PCRE2_MULTILINE; | |
112 | else if (ch == 's') res |= PCRE2_DOTALL; | |
113 | else if (ch == 'x') res |= PCRE2_EXTENDED; | |
114 | else if (ch == 'U') res |= PCRE2_UNGREEDY; | |
115 | //else if (ch == 'X') res |= PCRE2_EXTRA; //### does not exist in PCRE2 -> reflect in manual | |
116 | } | |
117 | return res; | |
118 | } | |
119 | default: | |
120 | return luaL_typerror (L, pos, "number or string"); | |
121 | } | |
122 | } | |
123 | ||
124 | static int generate_error (lua_State *L, const TPcre2 *ud, int errcode) { | |
125 | const char *key = get_flag_key (pcre2_error_flags, errcode); | |
126 | (void) ud; | |
127 | if (key) | |
128 | return luaL_error (L, "error PCRE2_%s", key); | |
129 | else | |
130 | return luaL_error (L, "PCRE2 error code %d", errcode); | |
131 | } | |
132 | ||
133 | /* method r:dfa_exec (s, [st], [ef], [ovecsize], [wscount]) */ | |
134 | static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TPcre2 **ud) { | |
135 | *ud = check_ud (L); | |
136 | argE->text = luaL_checklstring (L, 2, &argE->textlen); | |
137 | argE->startoffset = get_startoffset (L, 3, argE->textlen); | |
138 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
139 | argE->ovecsize = (size_t)luaL_optinteger (L, 5, 100); | |
140 | argE->wscount = (size_t)luaL_optinteger (L, 6, 50); | |
141 | } | |
142 | ||
143 | static void push_chartables_meta (lua_State *L) { | |
144 | lua_pushinteger (L, INDEX_CHARTABLES_META); | |
145 | lua_rawget (L, ALG_ENVIRONINDEX); | |
146 | } | |
147 | ||
148 | static int Lpcre2_maketables (lua_State *L) { | |
149 | *(const void**)lua_newuserdata (L, sizeof(void*)) = pcre2_maketables(NULL); //### argument NULL | |
150 | push_chartables_meta (L); | |
151 | lua_setmetatable (L, -2); | |
152 | return 1; | |
153 | } | |
154 | ||
155 | static void **check_chartables (lua_State *L, int pos) { | |
156 | void **q; | |
157 | /* Compare the metatable against the C function environment. */ | |
158 | if (lua_getmetatable(L, pos)) { | |
159 | push_chartables_meta (L); | |
160 | if (lua_rawequal(L, -1, -2) && | |
161 | (q = (void **)lua_touserdata(L, pos)) != NULL) { | |
162 | lua_pop(L, 2); | |
163 | return q; | |
164 | } | |
165 | } | |
166 | luaL_argerror(L, pos, lua_pushfstring (L, "not a %s", chartables_typename)); | |
167 | return NULL; | |
168 | } | |
169 | ||
170 | static int chartables_gc (lua_State *L) { | |
171 | void **ud = check_chartables (L, 1); | |
172 | if (*ud) { | |
173 | free (*ud); //### free() should be called only if pcre2_maketables was called with NULL argument | |
174 | *ud = NULL; | |
175 | } | |
176 | return 0; | |
177 | } | |
178 | ||
179 | static int chartables_tostring (lua_State *L) { | |
180 | void **ud = check_chartables (L, 1); | |
181 | lua_pushfstring (L, "%s (%p)", chartables_typename, ud); | |
182 | return 1; | |
183 | } | |
184 | ||
185 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) { | |
186 | argC->locale = NULL; | |
187 | argC->tables = NULL; | |
188 | if (!lua_isnoneornil (L, pos)) { | |
189 | if (lua_isstring (L, pos)) | |
190 | argC->locale = lua_tostring (L, pos); | |
191 | else { | |
192 | argC->tablespos = pos; | |
193 | argC->tables = (const unsigned char*) *check_chartables (L, pos); | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | static int compile_regex (lua_State *L, const TArgComp *argC, TPcre2 **pud) { | |
199 | int errcode; | |
200 | PCRE2_SIZE erroffset; | |
201 | TPcre2 *ud; | |
202 | ||
203 | ud = (TPcre2*)lua_newuserdata (L, sizeof (TPcre2)); | |
204 | memset (ud, 0, sizeof (TPcre2)); /* initialize all members to 0 */ | |
205 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
206 | lua_setmetatable (L, -2); | |
207 | ||
208 | ud->ccontext = pcre2_compile_context_create(NULL); | |
209 | if (ud->ccontext == NULL) | |
210 | return luaL_error (L, "malloc failed"); | |
211 | ||
212 | if (argC->locale) { | |
213 | char old_locale[256]; | |
214 | strcpy (old_locale, setlocale (LC_CTYPE, NULL)); /* store the locale */ | |
215 | if (NULL == setlocale (LC_CTYPE, argC->locale)) /* set new locale */ | |
216 | return luaL_error (L, "cannot set locale"); | |
217 | ud->tables = pcre2_maketables (NULL); /* make tables with new locale */ //### argument NULL | |
218 | pcre2_set_character_tables(ud->ccontext, ud->tables); | |
219 | setlocale (LC_CTYPE, old_locale); /* restore the old locale */ | |
220 | } | |
221 | else if (argC->tables) { | |
222 | pcre2_set_character_tables(ud->ccontext, argC->tables); | |
223 | lua_pushinteger (L, INDEX_CHARTABLES_LINK); | |
224 | lua_rawget (L, ALG_ENVIRONINDEX); | |
225 | lua_pushvalue (L, -2); | |
226 | lua_pushvalue (L, argC->tablespos); | |
227 | lua_rawset (L, -3); | |
228 | lua_pop (L, 1); | |
229 | } | |
230 | ||
231 | ud->pr = pcre2_compile ((PCRE2_SPTR)argC->pattern, argC->patlen, argC->cflags, &errcode, | |
232 | &erroffset, ud->ccontext); //### DOUBLE-CHECK ALL ARGUMENTS | |
233 | if (!ud->pr) { | |
234 | if (push_error_message(L, errcode)) | |
235 | return luaL_error (L, "%s (pattern offset: %d)", lua_tostring(L,-1), erroffset + 1); | |
236 | else | |
237 | return luaL_error (L, "%s (pattern offset: %d)", "pattern compile error", erroffset + 1); | |
238 | } | |
239 | ||
240 | if (0 != pcre2_pattern_info (ud->pr, PCRE2_INFO_CAPTURECOUNT, &ud->ncapt)) //### | |
241 | return luaL_error (L, "could not get pattern info"); | |
242 | ||
243 | /* need (2 ints per capture, plus one for substring match) * 3/2 */ | |
244 | ud->match_data = pcre2_match_data_create(ud->ncapt+1, NULL); //### CHECK ALL | |
245 | if (!ud->match_data) | |
246 | return luaL_error (L, "malloc failed"); | |
247 | ||
248 | ud->ovector = pcre2_get_ovector_pointer(ud->match_data); | |
249 | ||
250 | if (pud) *pud = ud; | |
251 | return 1; | |
252 | } | |
253 | ||
254 | /* the target table must be on lua stack top */ | |
255 | static void do_named_subpatterns (lua_State *L, TPcre2 *ud, const char *text) { | |
256 | int i, namecount, name_entry_size; | |
257 | unsigned char *name_table; | |
258 | PCRE2_SPTR tabptr; | |
259 | ||
260 | /* do named subpatterns - NJG */ | |
261 | pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMECOUNT, &namecount); | |
262 | if (namecount <= 0) | |
263 | return; | |
264 | pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMETABLE, &name_table); | |
265 | pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); | |
266 | tabptr = name_table; | |
267 | for (i = 0; i < namecount; i++) { | |
268 | int n = (tabptr[0] << 8) | tabptr[1]; /* number of the capturing parenthesis */ | |
269 | if (n > 0 && n <= ALG_NSUB(ud)) { /* check range */ | |
270 | lua_pushstring (L, (char *)tabptr + 2); /* name of the capture, zero terminated */ | |
271 | ALG_PUSHSUB_OR_FALSE (L, ud, text, n); | |
272 | lua_rawset (L, -3); | |
273 | } | |
274 | tabptr += name_entry_size; | |
275 | } | |
276 | } | |
277 | ||
278 | static int Lpcre2_dfa_exec (lua_State *L) | |
279 | { | |
280 | TArgExec argE; | |
281 | TPcre2 *ud; | |
282 | int res; | |
283 | int *wspace; | |
284 | size_t wsize; | |
285 | ||
286 | checkarg_dfa_exec (L, &argE, &ud); | |
287 | wsize = argE.wscount * sizeof(int); | |
288 | wspace = (int*) Lmalloc (L, wsize); | |
289 | if (!wspace) | |
290 | luaL_error (L, "malloc failed"); | |
291 | ||
292 | ud->match_data = pcre2_match_data_create(argE.ovecsize/2, NULL); //### CHECK ALL | |
293 | if (!ud->match_data) | |
294 | return luaL_error (L, "malloc failed"); | |
295 | ||
296 | res = pcre2_dfa_match (ud->pr, (PCRE2_SPTR)argE.text, argE.textlen, argE.startoffset, | |
297 | argE.eflags, ud->match_data, NULL, wspace, argE.wscount); //### CHECK ALL | |
298 | ||
299 | if (ALG_ISMATCH (res) || res == PCRE2_ERROR_PARTIAL) { | |
300 | int i; | |
301 | int max = (res>0) ? res : (res==0) ? (int)argE.ovecsize/2 : 1; | |
302 | PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(ud->match_data); | |
303 | ||
304 | lua_pushinteger (L, ovector[0] + 1); /* 1-st return value */ | |
305 | lua_newtable (L); /* 2-nd return value */ | |
306 | for (i=0; i<max; i++) { | |
307 | lua_pushinteger (L, ovector[i+i+1]); | |
308 | lua_rawseti (L, -2, i+1); | |
309 | } | |
310 | lua_pushinteger (L, res); /* 3-rd return value */ | |
311 | Lfree (L, wspace, wsize); | |
312 | return 3; | |
313 | } | |
314 | else { | |
315 | Lfree (L, wspace, wsize); | |
316 | if (ALG_NOMATCH (res)) | |
317 | return lua_pushnil (L), 1; | |
318 | else | |
319 | return generate_error (L, ud, res); | |
320 | } | |
321 | } | |
322 | ||
323 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
324 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
325 | argE->startoffset, argE->eflags, ud->match_data, NULL); //### | |
326 | } | |
327 | ||
328 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
329 | lua_pushlstring (L, argE->text, argE->textlen); | |
330 | } | |
331 | ||
332 | static int findmatch_exec (TPcre2 *ud, TArgExec *argE) { | |
333 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
334 | argE->startoffset, argE->eflags, ud->match_data, NULL); //### | |
335 | } | |
336 | ||
337 | static int gsub_exec (TPcre2 *ud, TArgExec *argE, int st) { | |
338 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
339 | st, argE->eflags, ud->match_data, NULL); //### | |
340 | } | |
341 | ||
342 | static int split_exec (TPcre2 *ud, TArgExec *argE, int offset) { | |
343 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
344 | offset, argE->eflags, ud->match_data, NULL); //### | |
345 | } | |
346 | ||
347 | static int Lpcre2_gc (lua_State *L) { | |
348 | TPcre2 *ud = check_ud (L); | |
349 | if (ud->freed == 0) { /* precaution against "manual" __gc calling */ | |
350 | ud->freed = 1; | |
351 | if (ud->pr) pcre2_code_free (ud->pr); | |
352 | //if (ud->tables) pcre_free ((void *)ud->tables); //### | |
353 | if (ud->ccontext) pcre2_compile_context_free (ud->ccontext); | |
354 | if (ud->match_data) pcre2_match_data_free (ud->match_data); | |
355 | } | |
356 | return 0; | |
357 | } | |
358 | ||
359 | static int Lpcre2_tostring (lua_State *L) { | |
360 | TPcre2 *ud = check_ud (L); | |
361 | if (ud->freed == 0) | |
362 | lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); | |
363 | else | |
364 | lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); | |
365 | return 1; | |
366 | } | |
367 | ||
368 | static int Lpcre2_version (lua_State *L) { | |
369 | char buf[64]; | |
370 | pcre2_config(PCRE2_CONFIG_VERSION, buf); | |
371 | lua_pushstring (L, buf); | |
372 | return 1; | |
373 | } | |
374 | ||
375 | //### TODO: document this method. | |
376 | //### TODO: write tests for this method. | |
377 | static int Lpcre2_jit_compile (lua_State *L) { | |
378 | TPcre2 *ud = check_ud (L); | |
379 | uint32_t options = (uint32_t) luaL_optinteger (L, 2, PCRE2_JIT_COMPLETE); | |
380 | int errcode = pcre2_jit_compile (ud->pr, options); | |
381 | if (errcode == 0) { | |
382 | lua_pushboolean(L, 1); | |
383 | return 1; | |
384 | } | |
385 | lua_pushboolean(L, 0); | |
386 | return 1 + push_error_message(L, errcode); | |
387 | } | |
388 | ||
389 | #define SET_INFO_FIELD(L,ud,what,name,valtype) { \ | |
390 | valtype val; \ | |
391 | if (0 == pcre2_pattern_info (ud->pr, what, &val)) { \ | |
392 | lua_pushnumber (L, val); \ | |
393 | lua_setfield (L, -2, name); \ | |
394 | } \ | |
395 | } | |
396 | ||
397 | static int Lpcre2_pattern_info (lua_State *L) { | |
398 | TPcre2 *ud = check_ud (L); | |
399 | lua_newtable(L); | |
400 | ||
401 | SET_INFO_FIELD (L, ud, PCRE2_INFO_ALLOPTIONS, "ALLOPTIONS", uint32_t) | |
402 | SET_INFO_FIELD (L, ud, PCRE2_INFO_ARGOPTIONS, "ARGOPTIONS", uint32_t) | |
403 | SET_INFO_FIELD (L, ud, PCRE2_INFO_BACKREFMAX, "BACKREFMAX", uint32_t) | |
404 | SET_INFO_FIELD (L, ud, PCRE2_INFO_BSR, "BSR", uint32_t) | |
405 | SET_INFO_FIELD (L, ud, PCRE2_INFO_CAPTURECOUNT, "CAPTURECOUNT", uint32_t) | |
406 | //### SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTBITMAP, "FIRSTBITMAP", ???) | |
407 | SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTCODETYPE, "FIRSTCODETYPE", uint32_t) | |
408 | SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTCODEUNIT, "FIRSTCODEUNIT", uint32_t) | |
409 | SET_INFO_FIELD (L, ud, PCRE2_INFO_HASBACKSLASHC, "HASBACKSLASHC", uint32_t) | |
410 | SET_INFO_FIELD (L, ud, PCRE2_INFO_HASCRORLF, "HASCRORLF", uint32_t) | |
411 | SET_INFO_FIELD (L, ud, PCRE2_INFO_JCHANGED, "JCHANGED", uint32_t) | |
412 | SET_INFO_FIELD (L, ud, PCRE2_INFO_JITSIZE, "JITSIZE", size_t) | |
413 | SET_INFO_FIELD (L, ud, PCRE2_INFO_LASTCODETYPE, "LASTCODETYPE", uint32_t) | |
414 | SET_INFO_FIELD (L, ud, PCRE2_INFO_LASTCODEUNIT, "LASTCODEUNIT", uint32_t) | |
415 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MATCHEMPTY, "MATCHEMPTY", uint32_t) | |
416 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MATCHLIMIT, "MATCHLIMIT", uint32_t) | |
417 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MAXLOOKBEHIND, "MAXLOOKBEHIND", uint32_t) | |
418 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MINLENGTH, "MINLENGTH", uint32_t) | |
419 | SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMECOUNT, "NAMECOUNT", uint32_t) | |
420 | SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMEENTRYSIZE, "NAMEENTRYSIZE", uint32_t) | |
421 | //### SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMETABLE, "NAMETABLE", ???) | |
422 | SET_INFO_FIELD (L, ud, PCRE2_INFO_NEWLINE, "NEWLINE", uint32_t) | |
423 | SET_INFO_FIELD (L, ud, PCRE2_INFO_RECURSIONLIMIT, "RECURSIONLIMIT", uint32_t) | |
424 | SET_INFO_FIELD (L, ud, PCRE2_INFO_SIZE, "SIZE", size_t) | |
425 | ||
426 | return 1; | |
427 | } | |
428 | ||
429 | static const luaL_Reg chartables_meta[] = { | |
430 | { "__gc", chartables_gc }, | |
431 | { "__tostring", chartables_tostring }, | |
432 | { NULL, NULL } | |
433 | }; | |
434 | ||
435 | static const luaL_Reg r_methods[] = { | |
436 | { "exec", algm_exec }, | |
437 | { "tfind", algm_tfind }, /* old name: match */ | |
438 | { "find", algm_find }, | |
439 | { "match", algm_match }, | |
440 | { "dfa_exec", Lpcre2_dfa_exec }, | |
441 | { "patterninfo", Lpcre2_pattern_info }, //### document name change: fullinfo -> patterninfo | |
442 | { "fullinfo", Lpcre2_pattern_info }, //### compatibility name | |
443 | { "jit_compile", Lpcre2_jit_compile }, | |
444 | { "__gc", Lpcre2_gc }, | |
445 | { "__tostring", Lpcre2_tostring }, | |
446 | { NULL, NULL } | |
447 | }; | |
448 | ||
449 | static const luaL_Reg r_functions[] = { | |
450 | { "match", algf_match }, | |
451 | { "find", algf_find }, | |
452 | { "gmatch", algf_gmatch }, | |
453 | { "gsub", algf_gsub }, | |
454 | { "count", algf_count }, | |
455 | { "split", algf_split }, | |
456 | { "new", algf_new }, | |
457 | { "flags", Lpcre2_get_flags }, | |
458 | { "version", Lpcre2_version }, | |
459 | { "maketables", Lpcre2_maketables }, | |
460 | { "config", Lpcre2_config }, | |
461 | { NULL, NULL } | |
462 | }; | |
463 | ||
464 | /* Open the library */ | |
465 | REX_API int REX_OPENLIB (lua_State *L) { | |
466 | char buf_ver[64]; | |
467 | pcre2_config(PCRE2_CONFIG_VERSION, buf_ver); | |
468 | if (PCRE2_MAJOR > atoi (buf_ver)) { | |
469 | return luaL_error (L, "%s requires at least version %d of PCRE2 library", | |
470 | REX_LIBNAME, (int)PCRE2_MAJOR); | |
471 | } | |
472 | ||
473 | alg_register(L, r_methods, r_functions, "PCRE2"); | |
474 | ||
475 | /* create a table and register it as a metatable for "chartables" userdata */ | |
476 | lua_newtable (L); | |
477 | lua_pushliteral (L, "access denied"); | |
478 | lua_setfield (L, -2, "__metatable"); | |
479 | #if LUA_VERSION_NUM == 501 | |
480 | luaL_register (L, NULL, chartables_meta); | |
481 | lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_META); | |
482 | #else | |
483 | lua_pushvalue(L, -3); | |
484 | luaL_setfuncs (L, chartables_meta, 1); | |
485 | lua_rawseti (L, -3, INDEX_CHARTABLES_META); | |
486 | #endif | |
487 | ||
488 | /* create a table for connecting "chartables" userdata to "regex" userdata */ | |
489 | lua_newtable (L); | |
490 | lua_pushliteral (L, "k"); /* weak keys */ | |
491 | lua_setfield (L, -2, "__mode"); | |
492 | lua_pushvalue (L, -1); /* setmetatable (tb, tb) */ | |
493 | lua_setmetatable (L, -2); | |
494 | #if LUA_VERSION_NUM == 501 | |
495 | lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_LINK); | |
496 | #else | |
497 | lua_rawseti (L, -3, INDEX_CHARTABLES_LINK); | |
498 | #endif | |
499 | ||
500 | return 1; | |
501 | } |
0 | /* lpcre2_f.c - Lua binding of PCRE2 library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <pcre2.h> | |
4 | #include "lua.h" | |
5 | #include "lauxlib.h" | |
6 | #include "../common.h" | |
7 | ||
8 | #define VERSION_PCRE2 (PCRE2_MAJOR*100 + PCRE2_MINOR) | |
9 | ||
10 | static flag_pair pcre2_flags[] = { | |
11 | { "MAJOR", PCRE2_MAJOR }, | |
12 | { "MINOR", PCRE2_MINOR }, | |
13 | /*---------------------------------------------------------------------------*/ | |
14 | { "ANCHORED", PCRE2_ANCHORED }, | |
15 | { "NO_UTF_CHECK", PCRE2_NO_UTF_CHECK }, | |
16 | { "ALLOW_EMPTY_CLASS", PCRE2_ALLOW_EMPTY_CLASS }, | |
17 | { "ALT_BSUX", PCRE2_ALT_BSUX }, | |
18 | { "AUTO_CALLOUT", PCRE2_AUTO_CALLOUT }, | |
19 | { "CASELESS", PCRE2_CASELESS }, | |
20 | { "DOLLAR_ENDONLY", PCRE2_DOLLAR_ENDONLY }, | |
21 | { "DOTALL", PCRE2_DOTALL }, | |
22 | { "DUPNAMES", PCRE2_DUPNAMES }, | |
23 | { "EXTENDED", PCRE2_EXTENDED }, | |
24 | { "FIRSTLINE", PCRE2_FIRSTLINE }, | |
25 | { "MATCH_UNSET_BACKREF", PCRE2_MATCH_UNSET_BACKREF }, | |
26 | { "MULTILINE", PCRE2_MULTILINE }, | |
27 | { "NEVER_UCP", PCRE2_NEVER_UCP }, | |
28 | { "NEVER_UTF", PCRE2_NEVER_UTF }, | |
29 | { "NO_AUTO_CAPTURE", PCRE2_NO_AUTO_CAPTURE }, | |
30 | { "NO_AUTO_POSSESS", PCRE2_NO_AUTO_POSSESS }, | |
31 | { "NO_DOTSTAR_ANCHOR", PCRE2_NO_DOTSTAR_ANCHOR }, | |
32 | { "NO_START_OPTIMIZE", PCRE2_NO_START_OPTIMIZE }, | |
33 | { "UCP", PCRE2_UCP }, | |
34 | { "UNGREEDY", PCRE2_UNGREEDY }, | |
35 | { "UTF", PCRE2_UTF }, | |
36 | { "NEVER_BACKSLASH_C", PCRE2_NEVER_BACKSLASH_C }, | |
37 | { "ALT_CIRCUMFLEX", PCRE2_ALT_CIRCUMFLEX }, | |
38 | { "ALT_VERBNAMES", PCRE2_ALT_VERBNAMES }, | |
39 | { "USE_OFFSET_LIMIT", PCRE2_USE_OFFSET_LIMIT }, | |
40 | { "JIT_COMPLETE", PCRE2_JIT_COMPLETE }, | |
41 | { "JIT_PARTIAL_SOFT", PCRE2_JIT_PARTIAL_SOFT }, | |
42 | { "JIT_PARTIAL_HARD", PCRE2_JIT_PARTIAL_HARD }, | |
43 | { "NOTBOL", PCRE2_NOTBOL }, | |
44 | { "NOTEOL", PCRE2_NOTEOL }, | |
45 | { "NOTEMPTY", PCRE2_NOTEMPTY }, | |
46 | { "NOTEMPTY_ATSTART", PCRE2_NOTEMPTY_ATSTART }, | |
47 | { "PARTIAL_SOFT", PCRE2_PARTIAL_SOFT }, | |
48 | { "PARTIAL_HARD", PCRE2_PARTIAL_HARD }, | |
49 | { "DFA_RESTART", PCRE2_DFA_RESTART }, | |
50 | { "DFA_SHORTEST", PCRE2_DFA_SHORTEST }, | |
51 | { "SUBSTITUTE_GLOBAL", PCRE2_SUBSTITUTE_GLOBAL }, | |
52 | { "SUBSTITUTE_EXTENDED", PCRE2_SUBSTITUTE_EXTENDED }, | |
53 | { "SUBSTITUTE_UNSET_EMPTY", PCRE2_SUBSTITUTE_UNSET_EMPTY }, | |
54 | { "SUBSTITUTE_UNKNOWN_UNSET", PCRE2_SUBSTITUTE_UNKNOWN_UNSET }, | |
55 | { "SUBSTITUTE_OVERFLOW_LENGTH", PCRE2_SUBSTITUTE_OVERFLOW_LENGTH }, | |
56 | #ifdef PCRE2_NO_JIT | |
57 | { "NO_JIT", PCRE2_NO_JIT }, | |
58 | #endif | |
59 | { "NEWLINE_CR", PCRE2_NEWLINE_CR }, | |
60 | { "NEWLINE_LF", PCRE2_NEWLINE_LF }, | |
61 | { "NEWLINE_CRLF", PCRE2_NEWLINE_CRLF }, | |
62 | { "NEWLINE_ANY", PCRE2_NEWLINE_ANY }, | |
63 | { "NEWLINE_ANYCRLF", PCRE2_NEWLINE_ANYCRLF }, | |
64 | { "BSR_UNICODE", PCRE2_BSR_UNICODE }, | |
65 | { "BSR_ANYCRLF", PCRE2_BSR_ANYCRLF }, | |
66 | /*---------------------------------------------------------------------------*/ | |
67 | { "INFO_ALLOPTIONS", PCRE2_INFO_ALLOPTIONS }, | |
68 | { "INFO_ARGOPTIONS", PCRE2_INFO_ARGOPTIONS }, | |
69 | { "INFO_BACKREFMAX", PCRE2_INFO_BACKREFMAX }, | |
70 | { "INFO_BSR", PCRE2_INFO_BSR }, | |
71 | { "INFO_CAPTURECOUNT", PCRE2_INFO_CAPTURECOUNT }, | |
72 | { "INFO_FIRSTCODEUNIT", PCRE2_INFO_FIRSTCODEUNIT }, | |
73 | { "INFO_FIRSTCODETYPE", PCRE2_INFO_FIRSTCODETYPE }, | |
74 | { "INFO_FIRSTBITMAP", PCRE2_INFO_FIRSTBITMAP }, | |
75 | { "INFO_HASCRORLF", PCRE2_INFO_HASCRORLF }, | |
76 | { "INFO_JCHANGED", PCRE2_INFO_JCHANGED }, | |
77 | { "INFO_JITSIZE", PCRE2_INFO_JITSIZE }, | |
78 | { "INFO_LASTCODEUNIT", PCRE2_INFO_LASTCODEUNIT }, | |
79 | { "INFO_LASTCODETYPE", PCRE2_INFO_LASTCODETYPE }, | |
80 | { "INFO_MATCHEMPTY", PCRE2_INFO_MATCHEMPTY }, | |
81 | { "INFO_MATCHLIMIT", PCRE2_INFO_MATCHLIMIT }, | |
82 | { "INFO_MAXLOOKBEHIND", PCRE2_INFO_MAXLOOKBEHIND }, | |
83 | { "INFO_MINLENGTH", PCRE2_INFO_MINLENGTH }, | |
84 | { "INFO_NAMECOUNT", PCRE2_INFO_NAMECOUNT }, | |
85 | { "INFO_NAMEENTRYSIZE", PCRE2_INFO_NAMEENTRYSIZE }, | |
86 | { "INFO_NAMETABLE", PCRE2_INFO_NAMETABLE }, | |
87 | { "INFO_NEWLINE", PCRE2_INFO_NEWLINE }, | |
88 | { "INFO_RECURSIONLIMIT", PCRE2_INFO_RECURSIONLIMIT }, | |
89 | { "INFO_SIZE", PCRE2_INFO_SIZE }, | |
90 | { "INFO_HASBACKSLASHC", PCRE2_INFO_HASBACKSLASHC }, | |
91 | /*---------------------------------------------------------------------------*/ | |
92 | { NULL, 0 } | |
93 | }; | |
94 | ||
95 | flag_pair pcre2_error_flags[] = { | |
96 | { "ERROR_NOMATCH", PCRE2_ERROR_NOMATCH }, | |
97 | { "ERROR_PARTIAL", PCRE2_ERROR_PARTIAL }, | |
98 | { "ERROR_UTF8_ERR1", PCRE2_ERROR_UTF8_ERR1 }, | |
99 | { "ERROR_UTF8_ERR2", PCRE2_ERROR_UTF8_ERR2 }, | |
100 | { "ERROR_UTF8_ERR3", PCRE2_ERROR_UTF8_ERR3 }, | |
101 | { "ERROR_UTF8_ERR4", PCRE2_ERROR_UTF8_ERR4 }, | |
102 | { "ERROR_UTF8_ERR5", PCRE2_ERROR_UTF8_ERR5 }, | |
103 | { "ERROR_UTF8_ERR6", PCRE2_ERROR_UTF8_ERR6 }, | |
104 | { "ERROR_UTF8_ERR7", PCRE2_ERROR_UTF8_ERR7 }, | |
105 | { "ERROR_UTF8_ERR8", PCRE2_ERROR_UTF8_ERR8 }, | |
106 | { "ERROR_UTF8_ERR9", PCRE2_ERROR_UTF8_ERR9 }, | |
107 | { "ERROR_UTF8_ERR10", PCRE2_ERROR_UTF8_ERR10 }, | |
108 | { "ERROR_UTF8_ERR11", PCRE2_ERROR_UTF8_ERR11 }, | |
109 | { "ERROR_UTF8_ERR12", PCRE2_ERROR_UTF8_ERR12 }, | |
110 | { "ERROR_UTF8_ERR13", PCRE2_ERROR_UTF8_ERR13 }, | |
111 | { "ERROR_UTF8_ERR14", PCRE2_ERROR_UTF8_ERR14 }, | |
112 | { "ERROR_UTF8_ERR15", PCRE2_ERROR_UTF8_ERR15 }, | |
113 | { "ERROR_UTF8_ERR16", PCRE2_ERROR_UTF8_ERR16 }, | |
114 | { "ERROR_UTF8_ERR17", PCRE2_ERROR_UTF8_ERR17 }, | |
115 | { "ERROR_UTF8_ERR18", PCRE2_ERROR_UTF8_ERR18 }, | |
116 | { "ERROR_UTF8_ERR19", PCRE2_ERROR_UTF8_ERR19 }, | |
117 | { "ERROR_UTF8_ERR20", PCRE2_ERROR_UTF8_ERR20 }, | |
118 | { "ERROR_UTF8_ERR21", PCRE2_ERROR_UTF8_ERR21 }, | |
119 | { "ERROR_UTF16_ERR1", PCRE2_ERROR_UTF16_ERR1 }, | |
120 | { "ERROR_UTF16_ERR2", PCRE2_ERROR_UTF16_ERR2 }, | |
121 | { "ERROR_UTF16_ERR3", PCRE2_ERROR_UTF16_ERR3 }, | |
122 | { "ERROR_UTF32_ERR1", PCRE2_ERROR_UTF32_ERR1 }, | |
123 | { "ERROR_UTF32_ERR2", PCRE2_ERROR_UTF32_ERR2 }, | |
124 | { "ERROR_BADDATA", PCRE2_ERROR_BADDATA }, | |
125 | { "ERROR_MIXEDTABLES", PCRE2_ERROR_MIXEDTABLES }, | |
126 | { "ERROR_BADMAGIC", PCRE2_ERROR_BADMAGIC }, | |
127 | { "ERROR_BADMODE", PCRE2_ERROR_BADMODE }, | |
128 | { "ERROR_BADOFFSET", PCRE2_ERROR_BADOFFSET }, | |
129 | { "ERROR_BADOPTION", PCRE2_ERROR_BADOPTION }, | |
130 | { "ERROR_BADREPLACEMENT", PCRE2_ERROR_BADREPLACEMENT }, | |
131 | { "ERROR_BADUTFOFFSET", PCRE2_ERROR_BADUTFOFFSET }, | |
132 | { "ERROR_CALLOUT", PCRE2_ERROR_CALLOUT }, | |
133 | { "ERROR_DFA_BADRESTART", PCRE2_ERROR_DFA_BADRESTART }, | |
134 | { "ERROR_DFA_RECURSE", PCRE2_ERROR_DFA_RECURSE }, | |
135 | { "ERROR_DFA_UCOND", PCRE2_ERROR_DFA_UCOND }, | |
136 | { "ERROR_DFA_UFUNC", PCRE2_ERROR_DFA_UFUNC }, | |
137 | { "ERROR_DFA_UITEM", PCRE2_ERROR_DFA_UITEM }, | |
138 | { "ERROR_DFA_WSSIZE", PCRE2_ERROR_DFA_WSSIZE }, | |
139 | { "ERROR_INTERNAL", PCRE2_ERROR_INTERNAL }, | |
140 | { "ERROR_JIT_BADOPTION", PCRE2_ERROR_JIT_BADOPTION }, | |
141 | { "ERROR_JIT_STACKLIMIT", PCRE2_ERROR_JIT_STACKLIMIT }, | |
142 | { "ERROR_MATCHLIMIT", PCRE2_ERROR_MATCHLIMIT }, | |
143 | { "ERROR_NOMEMORY", PCRE2_ERROR_NOMEMORY }, | |
144 | { "ERROR_NOSUBSTRING", PCRE2_ERROR_NOSUBSTRING }, | |
145 | { "ERROR_NOUNIQUESUBSTRING", PCRE2_ERROR_NOUNIQUESUBSTRING }, | |
146 | { "ERROR_NULL", PCRE2_ERROR_NULL }, | |
147 | { "ERROR_RECURSELOOP", PCRE2_ERROR_RECURSELOOP }, | |
148 | { "ERROR_RECURSIONLIMIT", PCRE2_ERROR_RECURSIONLIMIT }, | |
149 | { "ERROR_UNAVAILABLE", PCRE2_ERROR_UNAVAILABLE }, | |
150 | { "ERROR_UNSET", PCRE2_ERROR_UNSET }, | |
151 | { "ERROR_BADOFFSETLIMIT", PCRE2_ERROR_BADOFFSETLIMIT }, | |
152 | { "ERROR_BADREPESCAPE", PCRE2_ERROR_BADREPESCAPE }, | |
153 | { "ERROR_REPMISSINGBRACE", PCRE2_ERROR_REPMISSINGBRACE }, | |
154 | { "ERROR_BADSUBSTITUTION", PCRE2_ERROR_BADSUBSTITUTION }, | |
155 | { "ERROR_BADSUBSPATTERN", PCRE2_ERROR_BADSUBSPATTERN }, | |
156 | { "ERROR_TOOMANYREPLACE", PCRE2_ERROR_TOOMANYREPLACE }, | |
157 | #ifdef PCRE2_ERROR_BADSERIALIZEDDATA | |
158 | { "ERROR_BADSERIALIZEDDATA", PCRE2_ERROR_BADSERIALIZEDDATA }, | |
159 | #endif | |
160 | /*---------------------------------------------------------------------------*/ | |
161 | { NULL, 0 } | |
162 | }; | |
163 | ||
164 | static flag_pair pcre2_config_flags[] = { | |
165 | { "PCRE2_CONFIG_BSR", PCRE2_CONFIG_BSR }, | |
166 | { "PCRE2_CONFIG_JIT", PCRE2_CONFIG_JIT }, | |
167 | { "PCRE2_CONFIG_JITTARGET", PCRE2_CONFIG_JITTARGET }, | |
168 | { "PCRE2_CONFIG_LINKSIZE", PCRE2_CONFIG_LINKSIZE }, | |
169 | { "PCRE2_CONFIG_MATCHLIMIT", PCRE2_CONFIG_MATCHLIMIT }, | |
170 | { "PCRE2_CONFIG_NEWLINE", PCRE2_CONFIG_NEWLINE }, | |
171 | { "PCRE2_CONFIG_PARENSLIMIT", PCRE2_CONFIG_PARENSLIMIT }, | |
172 | { "PCRE2_CONFIG_RECURSIONLIMIT", PCRE2_CONFIG_RECURSIONLIMIT }, | |
173 | { "PCRE2_CONFIG_STACKRECURSE", PCRE2_CONFIG_STACKRECURSE }, | |
174 | { "PCRE2_CONFIG_UNICODE", PCRE2_CONFIG_UNICODE }, | |
175 | { "PCRE2_CONFIG_UNICODE_VERSION", PCRE2_CONFIG_UNICODE_VERSION }, | |
176 | { "PCRE2_CONFIG_VERSION", PCRE2_CONFIG_VERSION }, | |
177 | /*---------------------------------------------------------------------------*/ | |
178 | { NULL, 0 } | |
179 | }; | |
180 | ||
181 | int Lpcre2_config (lua_State *L) { | |
182 | flag_pair *fp; | |
183 | if (lua_istable (L, 1)) | |
184 | lua_settop (L, 1); | |
185 | else | |
186 | lua_newtable (L); | |
187 | for (fp = pcre2_config_flags; fp->key; ++fp) { | |
188 | if (fp->val == PCRE2_CONFIG_JITTARGET) { | |
189 | #if PCRE2_CODE_UNIT_WIDTH == 8 | |
190 | char buf[64]; | |
191 | if (PCRE2_ERROR_BADOPTION != pcre2_config (fp->val, buf)) { | |
192 | lua_pushstring (L, buf); | |
193 | lua_setfield (L, -2, fp->key); | |
194 | } | |
195 | #endif | |
196 | } | |
197 | else { | |
198 | int val; | |
199 | if (0 == pcre2_config (fp->val, &val)) { | |
200 | lua_pushinteger (L, val); | |
201 | lua_setfield (L, -2, fp->key); | |
202 | } | |
203 | } | |
204 | } | |
205 | return 1; | |
206 | } | |
207 | ||
208 | int Lpcre2_get_flags (lua_State *L) { | |
209 | const flag_pair* fps[] = { pcre2_flags, pcre2_error_flags, NULL }; | |
210 | return get_flags (L, fps); | |
211 | } | |
212 |
60 | 60 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) |
61 | 61 | |
62 | 62 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ |
63 | (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
63 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
64 | 64 | |
65 | 65 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) |
66 | 66 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) |
68 | 68 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) |
69 | 69 | |
70 | 70 | #define ALG_BASE(st) (st) |
71 | #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) | |
71 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
72 | 72 | |
73 | 73 | typedef struct { |
74 | 74 | regex_t r; |
262 | 262 | { "find", algf_find }, |
263 | 263 | { "gmatch", algf_gmatch }, |
264 | 264 | { "gsub", algf_gsub }, |
265 | { "count", algf_count }, | |
265 | 266 | { "split", algf_split }, |
266 | 267 | { "new", algf_new }, |
267 | 268 | { "flags", Posix_get_flags }, |
37 | 37 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) |
38 | 38 | |
39 | 39 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ |
40 | (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
40 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
41 | 41 | |
42 | 42 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) |
43 | 43 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) |
45 | 45 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) |
46 | 46 | |
47 | 47 | #define ALG_BASE(st) (st) |
48 | #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) | |
48 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
49 | 49 | |
50 | 50 | typedef struct { |
51 | 51 | regex_t r; |
84 | 84 | argE->text = luaL_checklstring (L, 2, &argE->textlen); |
85 | 85 | checkarg_regaparams (L, 3, argP); |
86 | 86 | argE->startoffset = get_startoffset (L, 4, argE->textlen); |
87 | argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT); | |
87 | argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT); | |
88 | 88 | } |
89 | 89 | |
90 | 90 | static int generate_error (lua_State *L, const TPosix *ud, int errcode) { |
330 | 330 | { "find", algf_find }, |
331 | 331 | { "gmatch", algf_gmatch }, |
332 | 332 | { "gsub", algf_gsub }, |
333 | { "count", algf_count }, | |
333 | 334 | { "match", algf_match }, |
334 | 335 | { "split", algf_split }, |
335 | 336 | { "config", Ltre_config }, |
40 | 40 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) |
41 | 41 | |
42 | 42 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ |
43 | (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
43 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
44 | 44 | |
45 | 45 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBBEG(ud,n))/ALG_CHARSIZE + 1) |
46 | 46 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBEND(ud,n))/ALG_CHARSIZE) |
48 | 48 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) |
49 | 49 | |
50 | 50 | #define ALG_BASE(st) (st) |
51 | #define ALG_GETCFLAGS(L,pos) luaL_optint(L, pos, ALG_CFLAGS_DFLT) | |
51 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
52 | 52 | |
53 | 53 | typedef struct { |
54 | 54 | regex_t r; |
87 | 87 | argE->text = luaL_checklstring (L, 2, &argE->textlen); |
88 | 88 | checkarg_regaparams (L, 3, argP); |
89 | 89 | argE->startoffset = get_startoffset (L, 4, argE->textlen); |
90 | argE->eflags = luaL_optint (L, 5, ALG_EFLAGS_DFLT); | |
90 | argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT); | |
91 | 91 | } |
92 | 92 | |
93 | 93 | static int generate_error (lua_State *L, const TPosix *ud, int errcode) { |
211 | 211 | { "wfind", algf_find }, |
212 | 212 | { "wgmatch", algf_gmatch }, |
213 | 213 | { "wgsub", algf_gsub }, |
214 | { "wcount", algf_count }, | |
214 | 215 | { "wmatch", algf_match }, |
215 | 216 | { "wsplit", algf_split }, |
216 | 217 | { NULL, NULL } |
0 | 0 | To test Lrexlib, execute the following command line: |
1 | 1 | |
2 | lua ./runtest.lua [-v] LIBRARY... | |
2 | lua ./runtest.lua [-a] [-v] LIBRARY... | |
3 | 3 | |
4 | -a use the external "Alien" library for "buffer subject" tests, | |
5 | rather than the internal function | |
4 | 6 | -v gives verbose output |
4 | 4 | |
5 | 5 | local luatest = require "luatest" |
6 | 6 | local N = luatest.NT |
7 | local unpack = unpack or table.unpack | |
7 | 8 | |
8 | 9 | local function norm(a) return a==nil and N or a end |
9 | 10 | |
31 | 32 | --{ subj patt results } |
32 | 33 | { {"ab", lib.new"."}, {{"a",N}, {"b",N} } }, |
33 | 34 | { {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} }, |
34 | { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match | |
35 | { {"abcd", ".*" }, {{"abcd",N} } },--zero-length match | |
35 | 36 | { {"abc", "^." }, {{"a",N}} },--anchored pattern |
37 | } | |
38 | end | |
39 | ||
40 | local function set_f_count (lib, flg) | |
41 | return { | |
42 | Name = "Function count", | |
43 | Func = lib.count, | |
44 | --{ subj patt results } | |
45 | { {"ab", lib.new"."}, { 2 } }, | |
46 | { {("abcd"):rep(3), "(.)b.(d)"}, { 3 } }, | |
47 | { {"abcd", ".*" }, { 1 } }, | |
48 | { {"abc", "^." }, { 1 } }, | |
36 | 49 | } |
37 | 50 | end |
38 | 51 | |
215 | 228 | --{ s, p, f, n, res1, res2, res3 }, |
216 | 229 | { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test . |
217 | 230 | { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+ |
218 | { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .* | |
231 | { {"a2c3", ".*", "#" }, {"#", 1, 1} }, -- test .* | |
219 | 232 | { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} }, |
220 | 233 | { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d |
221 | 234 | { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D |
307 | 320 | set_m_tfind (lib), |
308 | 321 | set_m_find (lib), |
309 | 322 | set_m_match (lib), |
323 | set_f_count (lib), | |
310 | 324 | set_f_gsub1 (lib), |
311 | 325 | set_f_gsub2 (lib), |
312 | 326 | set_f_gsub3 (lib), |
1 | 1 | |
2 | 2 | local luatest = require "luatest" |
3 | 3 | local N = luatest.NT |
4 | local unpack = unpack or table.unpack | |
4 | 5 | |
5 | 6 | local function norm(a) return a==nil and N or a end |
6 | 7 |
1 | 1 | |
2 | 2 | local luatest = require "luatest" |
3 | 3 | local N = luatest.NT |
4 | local unpack = unpack or table.unpack | |
4 | 5 | |
5 | 6 | local function norm(a) return a==nil and N or a end |
6 | 7 |
66 | 66 | -- returns: |
67 | 67 | -- 1) true, if success; false, if failure |
68 | 68 | -- 2) test results table or error_message |
69 | local function test_function (test, func) | |
69 | local function test_function (test, func, newmembuffer) | |
70 | 70 | local res |
71 | 71 | local t = packNT (pcall (func, unpackNT (test[1]))) |
72 | 72 | if t[1] then |
73 | 73 | table.remove (t, 1) |
74 | 74 | res = t |
75 | if alien then | |
76 | local subject = test[1][1] | |
77 | local buf = alien.buffer (#subject) | |
78 | if #subject > 0 then | |
79 | alien.memmove (buf:topointer (), subject, #subject) | |
80 | end | |
81 | test[1][1] = buf | |
75 | if newmembuffer then | |
76 | test[1][1] = newmembuffer (test[1][1]) | |
82 | 77 | local t = packNT (pcall (func, unpackNT (test[1]))) |
83 | 78 | if t[1] then |
84 | 79 | table.remove (t, 1) |
85 | 80 | res = t |
86 | 81 | else |
87 | print "alien test failed" | |
82 | print "buffer subjects test failed" | |
88 | 83 | res = t[2] --> error_message |
89 | 84 | end |
90 | 85 | end |
119 | 114 | end |
120 | 115 | |
121 | 116 | -- returns: a list of failed tests |
122 | local function test_set (set, lib) | |
117 | local function test_set (set, lib, newmembuffer) | |
123 | 118 | local list = {} |
124 | 119 | |
125 | 120 | if type (set.Func) == "function" then |
126 | 121 | local func = set.Func |
127 | 122 | for i,test in ipairs (set) do |
128 | local ok, res = test_function (test, func) | |
123 | local ok, res = test_function (test, func, newmembuffer) | |
129 | 124 | if not ok then |
130 | 125 | table.insert (list, {i=i, res}) |
131 | 126 | end |
1 | 1 | |
2 | 2 | local luatest = require "luatest" |
3 | 3 | local N = luatest.NT |
4 | local unpack = unpack or table.unpack | |
4 | 5 | |
5 | 6 | local function norm(a) return a==nil and N or a end |
6 | 7 | |
56 | 57 | |
57 | 58 | local function set_f_gmatch (lib, flg) |
58 | 59 | -- gmatch (s, p, [cf], [ef]) |
59 | local pCSV = "(^[^,]*)|,([^,]*)" | |
60 | local pCSV = "[^,]*" | |
60 | 61 | local F = false |
61 | 62 | local function test_gmatch (subj, patt) |
62 | 63 | local out, guard = {}, 10 |
70 | 71 | return { |
71 | 72 | Name = "Function gmatch", |
72 | 73 | Func = test_gmatch, |
73 | --{ subj patt results } | |
74 | { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj | |
75 | { {"", pCSV}, {{"",F}} }, | |
76 | { {"12", pCSV}, {{"12",F}} }, | |
77 | ----{ {",", pCSV}, {{"", F},{F,""}} }, | |
78 | { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} }, | |
79 | ----{ {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} }, | |
74 | --{ subj patt results } | |
75 | { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj | |
76 | { {"", pCSV}, {{"",N}} }, | |
77 | { {"12", pCSV}, {{"12",N}} }, | |
78 | { {",", pCSV}, {{"", N},{"", N}} }, | |
79 | { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} }, | |
80 | { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} }, | |
81 | { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} }, | |
82 | { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} }, | |
80 | 83 | } |
81 | 84 | end |
82 | 85 | |
96 | 99 | Func = test_split, |
97 | 100 | --{ subj patt results } |
98 | 101 | { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj |
99 | { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } }, | |
100 | { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, | |
101 | { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } }, | |
102 | { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, | |
102 | { {"ab", "$"}, {{"ab","",N}, {"",N,N} } }, | |
103 | { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
104 | { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } }, | |
105 | { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
106 | { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } }, | |
107 | { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } }, | |
108 | { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }}, | |
109 | } | |
110 | end | |
111 | ||
112 | local function set_f_internal_test (lib, flg) | |
113 | return { | |
114 | Name = "Function internal_test", | |
115 | Func = lib.internal_test, | |
116 | --{ params results } | |
117 | { {""}, {true} }, | |
103 | 118 | } |
104 | 119 | end |
105 | 120 | |
127 | 142 | } |
128 | 143 | end |
129 | 144 | |
145 | local function set_m_capturecount (lib, flg) | |
146 | return { | |
147 | Name = "Method capturecount", | |
148 | Method = "capturecount", | |
149 | --{patt,cf,lo}, {subj,st,ef} { results } | |
150 | { {"a"}, {}, { 0 } }, | |
151 | { {"(a)"}, {}, { 1 } }, | |
152 | { {"(a)(a)"}, {}, { 2 } }, | |
153 | { {"((a)a)"}, {}, { 2 } }, | |
154 | { {"((?i)a)(?:a)"}, {}, { 1 } }, | |
155 | } | |
156 | end | |
157 | ||
130 | 158 | return function (libname) |
131 | 159 | local lib = require (libname) |
132 | 160 | local flags = lib.flags () |
133 | 161 | local sets = { |
162 | set_f_internal_test (lib, flags), | |
134 | 163 | set_f_match (lib, flags), |
135 | 164 | set_f_find (lib, flags), |
136 | 165 | set_f_gmatch (lib, flags), |
137 | 166 | set_f_split (lib, flags), |
138 | 167 | set_m_exec (lib, flags), |
139 | 168 | set_m_tfind (lib, flags), |
169 | set_m_capturecount (lib, flags), | |
140 | 170 | } |
141 | 171 | local MAJOR = tonumber(lib.version():match("%d+")) |
142 | 172 | if MAJOR >= 0 then |
1 | 1 | |
2 | 2 | local luatest = require "luatest" |
3 | 3 | local N = luatest.NT |
4 | local unpack = unpack or table.unpack | |
4 | 5 | |
5 | 6 | local function norm(a) return a==nil and N or a end |
6 | 7 | |
26 | 27 | |
27 | 28 | local function set_f_find (lib, flg) |
28 | 29 | local cp1251 = |
29 | "������¨�������������������������‗אבגדהו¸זחטיךכלםמןנסעףפץצקרש��ת�" | |
30 | "������¨�������������������������‗אבגדהו¸זחטיךכלםמןנסעףפץצקרשת���" | |
30 | 31 | local loc = "Russian_Russia.1251" |
31 | 32 | return { |
32 | 33 | Name = "Function find", |
59 | 60 | |
60 | 61 | local function set_f_gmatch (lib, flg) |
61 | 62 | -- gmatch (s, p, [cf], [ef]) |
62 | local pCSV = "(^[^,]*)|,([^,]*)" | |
63 | local pCSV = "[^,]*" | |
63 | 64 | local F = false |
64 | 65 | local function test_gmatch (subj, patt) |
65 | 66 | local out, guard = {}, 10 |
73 | 74 | return { |
74 | 75 | Name = "Function gmatch", |
75 | 76 | Func = test_gmatch, |
76 | --{ subj patt results } | |
77 | { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj | |
78 | { {"", pCSV}, {{"",F}} }, | |
79 | { {"12", pCSV}, {{"12",F}} }, | |
80 | { {",", pCSV}, {{"", F},{F,""}} }, | |
81 | { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} }, | |
82 | { {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} }, | |
77 | --{ subj patt results } | |
78 | { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj | |
79 | { {"", pCSV}, {{"",N}} }, | |
80 | { {"12", pCSV}, {{"12",N}} }, | |
81 | { {",", pCSV}, {{"", N},{"", N}} }, | |
82 | { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} }, | |
83 | { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} }, | |
84 | { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} }, | |
85 | { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} }, | |
83 | 86 | } |
84 | 87 | end |
85 | 88 | |
99 | 102 | Func = test_split, |
100 | 103 | --{ subj patt results } |
101 | 104 | { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj |
102 | { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } }, | |
103 | { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, | |
104 | { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } }, | |
105 | { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, | |
105 | { {"ab", "$"}, {{"ab","",N}, {"",N,N} } }, | |
106 | { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
107 | { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } }, | |
108 | { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
109 | { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } }, | |
110 | { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } }, | |
111 | { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }}, | |
106 | 112 | } |
107 | 113 | end |
108 | 114 | |
135 | 141 | end |
136 | 142 | |
137 | 143 | local function set_m_dfa_exec (lib, flg) |
144 | local ver = tonumber(lib.version():match("%d+%.%d+")) | |
145 | local NAP = ver < 8.34 and "" or "(*NO_AUTO_POSSESS)" | |
146 | local flag_partial = ver < 10.0 and flg.PARTIAL or flg.PARTIAL_SOFT | |
138 | 147 | return { |
139 | 148 | Name = "Method dfa_exec", |
140 | 149 | Method = "dfa_exec", |
141 | 150 | --{patt,cf,lo}, {subj,st,ef,os,ws} { results } |
142 | { {".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none] | |
143 | { {".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st | |
144 | { {".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st | |
151 | { {NAP..".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none] | |
152 | { {NAP..".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st | |
153 | { {NAP..".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st | |
145 | 154 | { {".+"}, {"abcd",5}, {N } }, -- failing st |
146 | { {".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none] | |
155 | { {NAP..".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none] | |
147 | 156 | { {".*?"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- non-greedy |
148 | 157 | { {"aBC",flg.CASELESS}, {"abc"}, {1,{3},1} }, -- cf |
149 | 158 | { {"aBC","i" }, {"abc"}, {1,{3},1} }, -- cf |
152 | 161 | { {"bc"}, {"abc",N, flg.ANCHORED}, {N } }, -- ef |
153 | 162 | { { "(.)b.(d)"}, {"abcd"}, {1,{4},1} }, --[captures] |
154 | 163 | { {"abc"}, {"ab"}, {N } }, |
155 | { {"abc"}, {"ab",N,flg.PARTIAL}, {1,{2},flg.ERROR_PARTIAL} }, | |
156 | { {".+"}, {string.rep("a",50),N,N,50,50}, {1, fill(50,26), 0}},-- small ovecsize | |
164 | { {"abc"}, {"ab",N,flag_partial}, {1,{2},flg.ERROR_PARTIAL} }, | |
165 | { {NAP..".+"}, {string.rep("a",50),N,N,50,50}, {1, fill(50,26), 0}},-- small ovecsize | |
157 | 166 | } |
167 | end | |
168 | ||
169 | local function set_m_fullinfo (lib, flg) | |
170 | local r = lib.new("(foo)(bar)") | |
171 | local info = r:fullinfo() | |
172 | assert(info.CAPTURECOUNT == 2) | |
158 | 173 | end |
159 | 174 | |
160 | 175 | return function (libname) |
167 | 182 | set_f_split (lib, flags), |
168 | 183 | set_m_exec (lib, flags), |
169 | 184 | set_m_tfind (lib, flags), |
185 | set_m_fullinfo (lib, flags), | |
170 | 186 | } |
171 | 187 | if flags.MAJOR >= 4 then |
172 | 188 | table.insert (sets, set_named_subpatterns (lib, flags)) |
0 | 0 | -- See Copyright Notice in the file LICENSE |
1 | 1 | |
2 | 2 | local pat2pcre = require "pat2pcre" |
3 | local unpack = unpack or table.unpack | |
3 | 4 | |
4 | 5 | local function get_gsub (lib) |
5 | 6 | return lib.gsub or |
26 | 27 | Func = get_gsub (lib), |
27 | 28 | --{ s, p, f, n, res1, res2, res3 }, |
28 | 29 | { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} }, |
29 | { {"a2c3", ".-", "#" }, {"#########", 9, 9} }, -- test .- | |
30 | { {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5, 5} }, -- test .- | |
30 | 31 | { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} }, |
31 | 32 | { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} }, |
32 | 33 | { {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d |
36 | 37 | { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} }, |
37 | 38 | { {"", pCSV,fCSV}, {"[]", 1, 1} }, |
38 | 39 | { {"123", pCSV,fCSV}, {"[123]", 1, 1} }, |
39 | { {",", pCSV,fCSV}, {"[][]", 2, 2} }, | |
40 | { {",", pCSV,fCSV}, {"[],", 1, 1} }, | |
40 | 41 | { {"123,,456", pCSV,fCSV}, {"[123][][456]", 3, 3}}, |
41 | { {",,123,456,,abc,789,", pCSV,fCSV}, {"[][][123][456][][abc][789][]", 8, 8}}, | |
42 | { {",,123,456,,abc,789,", pCSV,fCSV}, {"[],[123][456][][abc][789][]", 7, 7}}, | |
42 | 43 | } |
43 | 44 | -- convert patterns: lua -> pcre |
44 | 45 | for _, test in ipairs (set) do |
0 | 0 | -- See Copyright Notice in the file LICENSE |
1 | ||
2 | -- See if we have alien, so we can do tests with buffer subjects | |
3 | local ok | |
4 | ok, alien = pcall (require, "alien") | |
5 | if not ok then | |
6 | io.stderr:write ("Warning: alien not found, so cannot run tests with buffer subjects\n") | |
7 | alien = nil | |
8 | end | |
9 | 1 | |
10 | 2 | do |
11 | 3 | local path = "./?.lua;" |
15 | 7 | end |
16 | 8 | local luatest = require "luatest" |
17 | 9 | |
10 | local function newalienbuffer (str) | |
11 | local alien = require "alien" | |
12 | local buf = alien.buffer (#str) | |
13 | if #str > 0 then | |
14 | alien.memmove (buf:topointer (), str, #str) | |
15 | end | |
16 | return buf | |
17 | end | |
18 | ||
18 | 19 | -- returns: number of failures |
19 | local function test_library (libname, setfile, verbose) | |
20 | local function test_library (libname, setfile, verbose, use_alien) | |
20 | 21 | if verbose then |
21 | 22 | print (("[lib: %s; file: %s]"):format (libname, setfile)) |
22 | 23 | end |
24 | 25 | local f = require (setfile) |
25 | 26 | local sets = f (libname) |
26 | 27 | |
27 | local realalien = alien | |
28 | if libname == "rex_posix" and not lib.flags ().STARTEND and alien then | |
29 | alien = nil | |
30 | io.stderr:write ("Cannot run posix tests with alien without REG_STARTEND\n") | |
28 | local newmembuffer = use_alien and newalienbuffer or lib._newmembuffer | |
29 | if newmembuffer then | |
30 | if libname == "rex_posix" and not lib.flags ().STARTEND then | |
31 | newmembuffer = nil | |
32 | io.stderr:write ("Cannot run posix tests with buffer subjects without REG_STARTEND\n") | |
33 | end | |
34 | else | |
35 | io.stderr:write ("Warning: cannot run tests with buffer subjects\n") | |
31 | 36 | end |
32 | 37 | |
33 | 38 | local n = 0 -- number of failures |
35 | 40 | if verbose then |
36 | 41 | print (set.Name or "Unnamed set") |
37 | 42 | end |
38 | local err = luatest.test_set (set, lib) | |
43 | local err = luatest.test_set (set, lib, newmembuffer) | |
39 | 44 | if verbose then |
40 | 45 | for _,v in ipairs (err) do |
41 | 46 | print (" Test " .. v.i) |
47 | 52 | if verbose then |
48 | 53 | print "" |
49 | 54 | end |
50 | alien = realalien | |
51 | 55 | return n |
52 | 56 | end |
53 | 57 | |
56 | 60 | gnu = { lib = "rex_gnu", "common_sets", "emacs_sets", "gnu_sets" }, |
57 | 61 | oniguruma = { lib = "rex_onig", "common_sets", "oniguruma_sets", }, |
58 | 62 | pcre = { lib = "rex_pcre", "common_sets", "pcre_sets", "pcre_sets2", }, |
63 | pcre2 = { lib = "rex_pcre2", "common_sets", "pcre_sets", "pcre_sets2", }, | |
59 | 64 | spencer = { lib = "rex_spencer", "common_sets", "posix_sets", "spencer_sets" }, |
60 | 65 | tre = { lib = "rex_tre", "common_sets", "posix_sets", "spencer_sets", --[["tre_sets"]] }, |
61 | 66 | } |
63 | 68 | do |
64 | 69 | local verbose, tests, nerr = false, {}, 0 |
65 | 70 | local dir |
71 | local use_alien | |
66 | 72 | -- check arguments |
67 | 73 | for i = 1, select ("#", ...) do |
68 | 74 | local arg = select (i, ...) |
69 | 75 | if arg:sub(1,1) == "-" then |
70 | 76 | if arg == "-v" then |
71 | 77 | verbose = true |
78 | elseif arg == "-a" then | |
79 | use_alien = true | |
72 | 80 | elseif arg:sub(1,2) == "-d" then |
73 | 81 | dir = arg:sub(3) |
82 | else | |
83 | error ("invalid argument: [" .. arg .. "]") | |
74 | 84 | end |
75 | 85 | else |
76 | 86 | if avail_tests[arg] then |
98 | 108 | for _, test in ipairs (tests) do |
99 | 109 | package.loaded[test.lib] = nil -- to force-reload the tested library |
100 | 110 | for _, setfile in ipairs (test) do |
101 | nerr = nerr + test_library (test.lib, setfile, verbose) | |
111 | nerr = nerr + test_library (test.lib, setfile, verbose, use_alien) | |
102 | 112 | end |
103 | 113 | end |
104 | 114 | print ("Total number of failures: " .. nerr) |
1 | 1 | |
2 | 2 | local luatest = require "luatest" |
3 | 3 | local N = luatest.NT |
4 | local unpack = unpack or table.unpack | |
4 | 5 | |
5 | 6 | local function norm(a) return a==nil and N or a end |
6 | 7 |
4 | 4 | |
5 | 5 | local luatest = require "luatest" |
6 | 6 | local N = luatest.NT |
7 | local unpack = unpack or table.unpack | |
7 | 8 | |
8 | 9 | local L = function(s) return (string.gsub(s, ".", "%0\0")) end |
9 | 10 |
0 | # Makefile for lrexlib | |
1 | ||
2 | ifeq ($(DIRBIT),64) | |
3 | MKFILES = \ | |
4 | rex_onig.mak \ | |
5 | rex_pcre.mak \ | |
6 | rex_pcre2.mak \ | |
7 | rex_tre.mak | |
8 | else | |
9 | MKFILES = \ | |
10 | rex_gnu.mak \ | |
11 | rex_onig.mak \ | |
12 | rex_pcre.mak \ | |
13 | rex_pcre2.mak \ | |
14 | rex_spencer.mak \ | |
15 | rex_tre.mak | |
16 | endif | |
17 | ||
18 | LOOP = @for %%d in ($(MKFILES)) do $(MAKE) -f %%d | |
19 | ||
20 | all: build test | |
21 | ||
22 | build: | |
23 | $(LOOP) | |
24 | ||
25 | test: | |
26 | $(LOOP) test | |
27 | ||
28 | install: | |
29 | $(LOOP) install | |
30 | ||
31 | clean: | |
32 | del *.o *.def *.dll | |
33 | ||
34 | .PHONY: all build test install clean |
0 | # Use with GNU Make. | |
1 | ||
2 | # Lrexlib version | |
3 | VERSION = 2.9.1 | |
4 | ||
5 | # User Settings ------------------------------------------------------------ | |
6 | ||
7 | # Target Lua version (51 for Lua 5.1, etc.) | |
8 | LUAVERSION = 51 | |
9 | LUADOTVERSION = $(subst 5,5.,$(LUAVERSION)) | |
10 | ||
11 | # Target bitness: 32 or 64 | |
12 | DIRBIT = 32 | |
13 | # GCC location (GCC32 and GCC64 are defined environment variables) | |
14 | PATH = $(GCC$(DIRBIT)) | |
15 | ||
16 | # INSTALLPATH : Path to install the built DLL. | |
17 | # LUADLL : Lua DLL to link to (.dll should be omitted). | |
18 | # LUAEXE : Lua interpreter. | |
19 | # LUAINC : Path of Lua include files. | |
20 | # LIBPATH : Path of lua51.dll, lua52.dll, pcre.dll, etc. | |
21 | ||
22 | INSTALLPATH = S:\Progr\Exe\lib$(DIRBIT)\lua\$(LUADOTVERSION) | |
23 | LUADLL = lua$(LUAVERSION) | |
24 | LUAINC = $(PATH_SYSTEM)\include\lua\$(LUADOTVERSION) | |
25 | LIBPATH = $(CROOT)\Programs\EXE$(DIRBIT) | |
26 | ||
27 | ifeq ($(LUAVERSION),51) | |
28 | LUAEXE = $(LIBPATH)\lua.exe | |
29 | CREATEGLOBAL = -DREX_CREATEGLOBALVAR | |
30 | else | |
31 | LUAEXE = $(LIBPATH)\lua$(LUAVERSION).exe | |
32 | endif | |
33 | # -------------------------------------------------------------------------- | |
34 | ||
35 | BIN = $(PROJECT).dll | |
36 | BININSTALL = $(INSTALLPATH)\$(BIN) | |
37 | CC = gcc | |
38 | AR = ar rcu | |
39 | RANLIB = ranlib | |
40 | CFLAGS = -W -Wall -O2 $(INCS) -DREX_OPENLIB=luaopen_$(PROJECT) \ | |
41 | -DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" \ | |
42 | -m$(DIRBIT) $(CREATEGLOBAL) $(MYCFLAGS) | |
43 | DEFFILE = $(PROJECT).def | |
44 | EXPORTED = luaopen_$(PROJECT) | |
45 | INCS = -I$(LUAINC) $(MYINCS) | |
46 | LIBS = -l$(LUADLL) -m$(DIRBIT) -s $(MYLIBS) | |
47 | SRCPATH = ..\..\src | |
48 | TESTPATH = ..\..\test | |
49 | ||
50 | .PHONY: all install test vtest clean | |
51 | ||
52 | vpath %.c $(SRCPATH);$(SRCPATH)\$(PROJDIR) | |
53 | vpath %.h $(SRCPATH);$(SRCPATH)\$(PROJDIR) | |
54 | ||
55 | all: $(BIN) | |
56 | ||
57 | clean: | |
58 | del $(OBJ) $(BIN) $(DEFFILE) | |
59 | ||
60 | install: $(BININSTALL) | |
61 | ||
62 | test: | |
63 | cd $(TESTPATH) && $(LUAEXE) runtest.lua $(TESTNAME) -d$(CURDIR) | |
64 | ||
65 | vtest: | |
66 | cd $(TESTPATH) && $(LUAEXE) runtest.lua -v $(TESTNAME) -d$(CURDIR) | |
67 | ||
68 | $(BIN): $(OBJ) $(DEFFILE) | |
69 | $(CC) $(DEFFILE) $(OBJ) -L$(LIBPATH) $(LIBS) -o $@ -shared | |
70 | ||
71 | lib$(PROJECT)$(LUAVERSION).a: $(OBJ) | |
72 | $(AR) $@ $? | |
73 | $(RANLIB) $@ | |
74 | ||
75 | $(DEFFILE): | |
76 | echo EXPORTS > $@ | |
77 | for %%d in ($(EXPORTED)) do echo %%d>> $@ | |
78 | ||
79 | $(BININSTALL): $(BIN) | |
80 | copy /Y $< $@ |
0 | # Documentation Makefile | |
1 | ||
2 | APP = rst2html.py | |
3 | CP = "copy /y" | |
4 | RM = del | |
5 | IDX = ..\README.rst | |
6 | ||
7 | ALLVAR = APP=$(APP) CP=$(CP) RM=$(RM) IDX=$(IDX) | |
8 | ||
9 | .PHONY: all clean | |
10 | ||
11 | all clean: | |
12 | cd ..\..\doc && $(MAKE) $(ALLVAR) $@ |
0 | # Project: rex_gnu | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of GNU include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\gnuregex | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_gnu | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lregex2 | |
10 | OBJ = lgnu.o common.o | |
11 | PROJDIR = gnu | |
12 | TESTNAME = gnu | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lgnu.o : common.h algo.h | |
17 | common.o : common.h |
0 | # Project: rex_onig | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of Oniguruma include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\oniguruma | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_onig | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lonig -Wl,--enable-auto-import | |
10 | OBJ = lonig.o lonig_f.o common.o | |
11 | PROJDIR = oniguruma | |
12 | TESTNAME = oniguruma | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lonig.o : common.h algo.h | |
17 | lonig_f.o : common.h | |
18 | common.o : common.h |
0 | # Project: rex_pcre | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of PCRE include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\pcre | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_pcre | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lpcre | |
10 | OBJ = lpcre.o lpcre_f.o common.o | |
11 | PROJDIR = pcre | |
12 | TESTNAME = pcre | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lpcre.o : common.h algo.h | |
17 | lpcre_f.o : common.h | |
18 | common.o : common.h |
0 | # Project: rex_pcre2 | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of PCRE2 include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\pcre2 | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_pcre2 | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYCFLAGS = -DPCRE2_CODE_UNIT_WIDTH=8 | |
10 | MYLIBS = -lpcre2 | |
11 | OBJ = lpcre2.o lpcre2_f.o common.o | |
12 | PROJDIR = pcre2 | |
13 | TESTNAME = pcre2 | |
14 | ||
15 | include _mingw.mak | |
16 | ||
17 | lpcre2.o : common.h algo.h | |
18 | lpcre2_f.o : common.h | |
19 | common.o : common.h |
0 | # Project: rex_spencer | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of Spencer's include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\rxspencer | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_spencer | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lrxspencer | |
10 | OBJ = lposix.o common.o | |
11 | PROJDIR = posix | |
12 | TESTNAME = spencer | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lposix.o : common.h algo.h | |
17 | common.o : common.h |
0 | # Project: rex_tre | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of TRE include files | |
4 | REGEXINC = $(PATH_WORK)\system\include | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_tre | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -ltre | |
10 | OBJ = ltre.o common.o | |
11 | PROJDIR = tre | |
12 | TESTNAME = tre | |
13 | ||
14 | # Uncomment the following line to add wide-character functions (in alpha state). | |
15 | # ADDWIDECHARFUNCS = 1 | |
16 | ifdef ADDWIDECHARFUNCS | |
17 | OBJ += ltre_w.o | |
18 | MYCFLAGS += -DREX_ADDWIDECHARFUNCS | |
19 | endif | |
20 | ||
21 | include _mingw.mak | |
22 | ||
23 | ltre.o : common.h algo.h | |
24 | ltre_w.o : common.h algo.h | |
25 | common.o : common.h |