New upstream version 2.9.1
Sergei Golovan
2 years ago
0 | *.[oa] | |
1 | *~ | |
2 | *.so.* | |
3 | *.so | |
4 | *.src.rock | |
5 | ChangeLog | |
6 | *.zip | |
7 | release-notes | |
8 | /*.rockspec | |
9 | /luarocks |
0 | 2010-07-06 Shmuel Zeigerman <shmuz@013net.net> | |
1 | ||
2 | * lonig.c: casts to suppress compiler warnings. | |
3 | ||
4 | 2010-07-05 Shmuel Zeigerman <shmuz@013net.net> | |
5 | ||
6 | * lpcre_f.c: added flags up to PCRE version 8.10. | |
7 | ||
8 | 2010-01-19 Shmuel Zeigerman <shmuz@013net.net> | |
9 | ||
10 | * several files: luaL_typerror renamed to luaL_typeerror (as in Lua 5.2). | |
11 | * common.h: added a macro for handling luaL_typeerror and luaL_typerror. | |
12 | ||
13 | 2009-11-29 Shmuel Zeigerman <shmuz@013net.net> | |
14 | ||
15 | * ltre.c: changes to adapt to TRE 0.8 | |
16 | 1) #include <tre/tre.h> (was: <tre/regex.h>). | |
17 | 2) added tre_ prefix to all TRE functions. | |
18 | ||
19 | 2009-11-08 Shmuel Zeigerman <shmuz@013net.net> | |
20 | ||
21 | * lpcre_f.c: added flags up to PCRE version 8.00. | |
22 | ||
23 | 2008-08-04 Shmuel Zeigerman <shmuz@013net.net> | |
24 | ||
25 | * onig.c: making 'locale' and 'syntax' case sensitive again. | |
26 | * onig_f.c: enclose all flags definitions into #ifdef's. | |
27 | ||
28 | 2008-07-30 Reuben Thomas <rrt@sc3d.org> | |
29 | ||
30 | * Fixes in make files and docs. Testing on Linux. | |
31 | ||
32 | 2008-07-30 Shmuel Zeigerman <shmuz@013net.net> | |
33 | ||
34 | * Place each binding in separate directory: | |
35 | src/posix, src/pcre, src/tre, src/oniguruma. | |
36 | ||
37 | 2008-07-27 Shmuel Zeigerman <shmuz@013net.net> | |
38 | ||
39 | * Initial binding of Oniguruma library. | |
40 | ||
41 | 2008-06-22 Shmuel Zeigerman <shmuz@013net.net> | |
42 | ||
43 | * test/luatest.lua: a fix; did not work with 'strict'. | |
44 | ||
45 | 2008-06-14 Shmuel Zeigerman <shmuz@013net.net> | |
46 | ||
47 | * algo.h: [API extension; suggested by Reuben Thomas] | |
48 | all functions receiving string-type regex accept a compiled regex too. | |
49 | If this is the case, cf and lo arguments are ignored (should be | |
50 | either supplied as nils or omitted). | |
51 | * algo.h: update version string to "Lrexlib 2.4.0". | |
52 | ||
53 | 2008-05-09 Shmuel Zeigerman <shmuz@013net.net> | |
54 | ||
55 | * lpcre_f.c: added new flag from PCRE-7.7. | |
56 | ||
57 | 2008-03-28 Shmuel Zeigerman <shmuz@013net.net> | |
58 | * algo.h, lpcre.h, lposix.h, ltre.h: add two new methods (find and match). | |
59 | * algo.h: rename functions: | |
60 | checkarg_tfind --> checkarg_find_method; | |
61 | generic_tfind --> generic_find_method; | |
62 | checkarg_find_f --> checkarg_find_func; | |
63 | generic_find --> generic_find_func; | |
64 | ||
65 | 2008-03-25 Shmuel Zeigerman <shmuz@013net.net> | |
66 | * algo.h (plainfind_func): optimize for speed. | |
67 | * algo.h (plainfind_func): treat empty patterns as valid. | |
68 | * test/common_sets.lua (set_f_plainfind): add tests with empty patterns. | |
69 | ||
70 | 2008-01-12 Shmuel Zeigerman <shmuz@013net.net> | |
71 | * test/luatest.lua: refactoring. | |
72 | ||
73 | 2007-12-27 Shmuel Zeigerman <shmuz@013net.net> | |
74 | * test/runtest.lua: add new command-line switch -d<directory> that | |
75 | will prepend <directory> to package.cpath. | |
76 | * Makefile: add -d../src to testing commands, for testing the freshly built | |
77 | libraries rather than the installed ones. | |
78 | ||
79 | 2007-12-25 Shmuel Zeigerman <shmuz@013net.net> | |
80 | * test/common_sets.lua, test/spencer_sets.lua, test/pcre_sets.lua, | |
81 | test/pcre_sets2.lua: | |
82 | move tests with NULs in subject from common_sets.lua into other set | |
83 | files. | |
84 | ||
85 | 2007-12-19 Shmuel Zeigerman <shmuz@013net.net> | |
86 | ||
87 | * Makefile: [fix] remove TRE-related parts (reported by Christian Wiese). | |
88 | * Makefile: separate PCRE and POSIX targets (suggested by Hisham Muhammad). | |
89 | * src/common.mak: express "ld" and "-shared" via variables (suggested by | |
90 | Hisham Muhammad). | |
91 | * src/algo.h: update version string to "Lrexlib 2.2.2". | |
92 | ||
93 | 2007-11-09 Shmuel Zeigerman <shmuz@013net.net> | |
94 | ||
95 | * 4 source files: refactoring (ALG_GETCFLAGS redefined). | |
96 | ||
97 | 2007-10-25 Shmuel Zeigerman <shmuz@013net.net> | |
98 | ||
99 | * algo.h: version string updated to "Lrexlib 2.2.1". | |
100 | ||
101 | 2007-09-25 Shmuel Zeigerman <shmuz@013net.net> | |
102 | ||
103 | * lpcre_f.c: added new flags from PCRE-7.4. | |
104 | ||
105 | 2007-09-20 Shmuel Zeigerman <shmuz@013net.net> | |
106 | ||
107 | * algo.h (generic_tfind): bugfix. | |
108 | ||
109 | * common_sets.lua (set_m_exec): one test added. | |
110 | ||
111 | 2007-08-29 Shmuel Zeigerman <shmuz@013net.net> | |
112 | ||
113 | * lpcre_f.c: added new flags from PCRE-7.3. | |
114 | ||
115 | 2007-06-10 Shmuel Zeigerman <shmuz@013net.net> | |
116 | ||
117 | * makefiles for Windows/MinGW: | |
118 | * .a files are not needed anymore | |
119 | * added test and install targets | |
120 | ||
121 | 2007-06-08 Shmuel Zeigerman <shmuz@013net.net> | |
122 | ||
123 | * algo_t.h: file removed; its contents moved to common.h. | |
124 | ||
125 | 2007-05-03 Shmuel Zeigerman <shmuz@013net.net> | |
126 | ||
127 | * lpcre.c (settables): removed API function. | |
128 | ||
129 | 2007-05-02 Shmuel Zeigerman <shmuz@013net.net> | |
130 | ||
131 | * algo_t.h: new file added, for making struct definitions visible to | |
132 | the file parts lying above the line #include algo.h. | |
133 | ||
134 | * lpcre.c: [API extension]: cflags may be specified by a string. | |
135 | ||
136 | 2007-04-30 Shmuel Zeigerman <shmuz@013net.net> | |
137 | ||
138 | * lpcre.c (REX_OPENLIB, OPTLOCALE, compile_regex): bugfix: "tables" | |
139 | userdata could be garbage-collected before the "regex" userdata | |
140 | that was using it. | |
141 | ||
142 | * lpcre_f.c: added a new flag from PCRE-7.1. | |
143 | ||
144 | 2007-04-21 Shmuel Zeigerman <shmuz@013net.net> | |
145 | ||
146 | * test/luatest.lua (eq): no more relying on tostring producing unique | |
147 | strings for different tables. | |
148 | ||
149 | 2007-04-20 Shmuel Zeigerman <shmuz@013net.net> | |
150 | ||
151 | * ltre.c (get_int_field, set_int_field): moved to common.c. | |
152 | ||
153 | 2007-04-19 Reuben Thomas <rrt@sc3d.org> | |
154 | ||
155 | * Add build system support for TRE on POSIX systems. | |
156 | ||
157 | 2007-04-18 Reuben Thomas <rrt@sc3d.org> | |
158 | ||
159 | * lpcre.c, lposix.c, ltre.c, algo.h: Fix some compiler warnings. | |
160 | ||
161 | 2007-04-17 Shmuel Zeigerman <shmuz@013net.net> | |
162 | ||
163 | * <all source files>: the metatable for regex userdata is the C-functions | |
164 | environment (not kept in the lua_State registry anymore). | |
165 | * lpcre.c: the metatable for 'pcre_tables' userdata is kept at index 1 | |
166 | of the C-functions environment. | |
167 | ||
168 | 2007-04-14 Shmuel Zeigerman <shmuz@013net.net> | |
169 | ||
170 | * lpcre.h (maketables): new API function (PCRE only). | |
171 | ||
172 | 2007-04-13 Shmuel Zeigerman <shmuz@013net.net> | |
173 | ||
174 | * algo.h (gmatch_iter, split_iter): bugfix: was improper detection | |
175 | of a "no advance" situation. | |
176 | * algo.h (split_iter): bugfix: if the subject ends with a separator, | |
177 | there must be an additional pass giving an empty section. | |
178 | * test/common_sets.lua: test results adjusted for bugfix in 'split'. | |
179 | * test/pcre_sets.lua: gmatch tests added (testing "retry" feature). | |
180 | ||
181 | 2007-04-12 Shmuel Zeigerman <shmuz@013net.net> | |
182 | ||
183 | * algo.h, lpcre.c (USE_RETRY): new macro. Changed gsub and gmatch | |
184 | behavior under PCRE when a "no advance" situation occurs. | |
185 | * test/pcre_sets2.lua: test results adjusted for the change in gsub | |
186 | behavior. | |
187 | ||
188 | 2007-04-11 Shmuel Zeigerman <shmuz@013net.net> | |
189 | ||
190 | * lpcre.c (settables): new API function (PCRE only). | |
191 | * test/pcre_sets.lua: locale tests added. | |
192 | ||
193 | 2007-03-19 Shmuel Zeigerman <shmuz@013net.net> | |
194 | ||
195 | * algo.h, lpcre.c, lposix.c, ltre.c: improved userdata check | |
196 | (detects bad userdata error prior to calling methods; | |
197 | prevents crashes). | |
198 | ||
199 | 2007-03-10 Shmuel Zeigerman <shmuz@013net.net> | |
200 | ||
201 | * algo.h: many functions added. | |
202 | * lpcre.c, lposix.c, ltre.c: many functions removed. | |
203 | ||
204 | 2007-03-09 Shmuel Zeigerman <shmuz@013net.net> | |
205 | ||
206 | * algo.h: new file added -- to contain the code of common algorithms. | |
207 | * It is, in fact, a C-file. | |
208 | * gsub, match, find: functions added | |
209 | ||
210 | * lpcre.c, lposix.c, ltre.c (gsub, match, find): functions removed. | |
211 | ||
212 | 2007-03-07 Shmuel Zeigerman <shmuz@013net.net> | |
213 | ||
214 | * lposix.c (REX_NSUB_BASE1): macro added, to facilitate building for | |
215 | Tom Lord's library. | |
216 | ||
217 | 2007-03-06 Shmuel Zeigerman <shmuz@013net.net> | |
218 | ||
219 | * lposix.c, ltre.c (gmatch, split, gsub): bugfixes: | |
220 | incorrect processing of patterns anchored at the beginning. | |
221 | Was cured that way: if (offset > 0) eflags |= REG_NOTBOL; | |
222 | ||
223 | * test/common_sets.lua: test cases added. | |
224 | ||
225 | 2007-03-05 Shmuel Zeigerman <shmuz@013net.net> | |
226 | ||
227 | * lpcre.c (gsub): bugfix: | |
228 | Test: { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} }. | |
229 | ||
230 | * test/pcre_sets2.lua: a test case added. | |
231 | ||
232 | 2007-03-03 Shmuel Zeigerman <shmuz@013net.net> | |
233 | ||
234 | * lpcre.c, lposix.c, ltre.c (gsub): [API change]: | |
235 | gsub returns 3 values; the 3-rd is number of replacements made. | |
236 | ||
237 | * test/*.lua: corrected tests for gsub (after API change). | |
238 | ||
239 | 2007-03-02 Shmuel Zeigerman <shmuz@013net.net> | |
240 | ||
241 | * test/common_sets.lua: added new tests for gsub (after API change). | |
242 | ||
243 | 2007-03-01 Shmuel Zeigerman <shmuz@013net.net> | |
244 | ||
245 | * lpcre.c, lposix.c, ltre.c (gsub): [API change]: | |
246 | a) 2-nd return of rep() is ignored --> API-compatible with string.gsub | |
247 | b) argument `n' can be a function --> API extension wrt string.gsub | |
248 | ||
249 | * common.h (REX_VERSION): updated to "2.2.0 beta". | |
250 | ||
251 | 2007-02-23 Shmuel Zeigerman <shmuz@013net.net> | |
252 | ||
253 | * lpcre.c, lpcre_f.c: | |
254 | * added #ifdef's to do named subpatterns only if PCRE_MAJOR >= 4, | |
255 | otherwise it wouldn't compile for PCRE 3.x [bugreport by Zhao Zhiguo]. | |
256 | * same #ifdef for Lpcre_config function. | |
257 | ||
258 | 2007-02-20 Shmuel Zeigerman <shmuz@013net.net> | |
259 | ||
260 | * ltre.c (aexec, atfind, have_backrefs, have_approx): new methods. | |
261 | aexec --> exec + approximate matching; | |
262 | atfind --> tfind + approximate matching; | |
263 | have_backrefs --> binding of tre_have_backrefs; | |
264 | have_approx --> binding of tre_have_approx; | |
265 | ||
266 | 2007-02-18 Shmuel Zeigerman <shmuz@013net.net> | |
267 | ||
268 | * ltre.c: all uses of regexec replaced by regnexec. | |
269 | * test/common_sets.lua: added tests with nuls in the subject. | |
270 | ||
271 | 2007-02-17 Shmuel Zeigerman <shmuz@013net.net> | |
272 | ||
273 | * ltre.c: new file added (started the binding of the TRE regex library). | |
274 | * test/posix_sets.lua: added tests with nuls in the subject or/and | |
275 | the pattern. | |
276 | ||
277 | 2007-02-12 Shmuel Zeigerman <shmuz@013net.net> | |
278 | ||
279 | * all sources (match, find, tfind, exec, dfa_exec): [API change] | |
280 | in case of ordinary non-match, only a nil is returned; | |
281 | other non-match cases generate an error. | |
282 | ||
283 | * test suite and the manual: updated to reflect the above API change. | |
284 | ||
285 | * all sources (gmatch, split, gsub): [bugfix] | |
286 | if during repeated matching pcre_exec/regexec returns a value that | |
287 | means neither match nor no-match, then an error is generated. | |
288 | ||
289 | * common.h (REX_VERSION): updated to "2.1.0". | |
290 | ||
291 | 2007-01-29 Shmuel Zeigerman <shmuz@013net.net> | |
292 | ||
293 | * lposix.c (checkarg_find_f): [bugfix] incorrect default for eflags. | |
294 | * lposix.c (generic_find): [bugfix] dereferencing uninitialized pointer. | |
295 | * common.h (REX_VERSION): updated to "2.0.2". | |
296 | ||
297 | 2007-01-27 Reuben Thomas <rrt@sc3d.org> | |
298 | ||
299 | * lpcre.c, lposix.c (gsub): [bugfix] | |
300 | - it was incorrectly assumed that the value on Lua stack was a string; | |
301 | - luaL_error was used where lua_error was more appropriate; | |
302 | ||
303 | 2007-01-18 Shmuel Zeigerman <shmuz@013net.net> | |
304 | ||
305 | * lpcre.c (gsub): [API change undone]. | |
306 | * all source files: refactoring. | |
307 | ||
308 | 2007-01-14 Shmuel Zeigerman <shmuz@013net.net> | |
309 | ||
310 | * lpcre.c (gsub): [API change]: | |
311 | a) 2-nd return of rep() is ignored --> API-compatible with string.gsub | |
312 | b) argument `n' can be a function --> API extension wrt string.gsub | |
313 | ||
314 | 2007-01-13 Shmuel Zeigerman <shmuz@013net.net> | |
315 | ||
316 | * lpcre.c, lposix.c: refactoring. | |
317 | * common.c, common.h: refactoring. | |
318 | * common.h (REX_VERSION): a new #define. | |
319 | * lpcre.c, lposix.c (REX_OPENLIB): using REX_VERSION. | |
320 | ||
321 | 2007-01-12 Shmuel Zeigerman <shmuz@013net.net> | |
322 | ||
323 | * common.h, common.c: a nasty bug fixed. | |
324 | * lpcre.c, lposix.c: version updated to 2.0.1. | |
325 | ||
326 | 2007-01-10 Shmuel Zeigerman <shmuz@013net.net> | |
327 | ||
328 | * lpcre.c, lposix.c: refactoring. | |
329 | ||
330 | 2007-01-08 Shmuel Zeigerman <shmuz@013net.net> | |
331 | ||
332 | * lpcre.c, lposix.c: refactoring. | |
333 | ||
334 | 2007-01-04 Reuben Thomas <rrt@sc3d.org> | |
335 | ||
336 | * Add a top-level Makefile with all, clean and test targets. | |
337 | * Split src/*.mak common parts into src/common.mak. | |
338 | ||
339 | 2007-01-04 Shmuel Zeigerman <shmuz@013net.net> | |
340 | ||
341 | * lpcre.c (Lpcre_gsub): unnecessary (though harmless) assignment removed. | |
342 | * test/runtest.lua: extended the command-line interface. | |
343 | * most files: the copyright notice changed to reference LICENSE file. | |
344 | ||
345 | 2007-01-02 Shmuel Zeigerman <shmuz@013net.net> | |
346 | ||
347 | * common.c (CheckFunction, OptFunction): functions removed. | |
348 | * lpcre.c (put_integer): function removed. | |
349 | ||
350 | 2006-12-31 Shmuel Zeigerman <shmuz@013net.net> | |
351 | ||
352 | * common.c (udata_tostring): function removed. | |
353 | * lpcre.c (Lpcre_tostring): added handling of deleted userdatum. | |
354 | * lposix.c (Posix_tostring): added handling of deleted userdatum. | |
355 | * lpcre.c: [API change] method `exec' now supports "named subpatterns". | |
356 | * test/all_test.lua: renamed to runtest.lua. | |
357 | ||
358 | 2006-12-30 Shmuel Zeigerman <shmuz@013net.net> | |
359 | ||
360 | * lpcre.c: [API change] `versionPCRE' renamed to `version'. | |
361 | ||
362 | 2006-12-29 Shmuel Zeigerman <shmuz@013net.net> | |
363 | ||
364 | * lpcre.c: [API change] removed support of PCRE callout. | |
365 | * test/pcre_sets.lua: removed testing of PCRE callout. | |
366 | ||
367 | 2006-12-27 Shmuel Zeigerman <shmuz@013net.net> | |
368 | ||
369 | * lpcre_f.c (config): [API change] the function accepts one optional | |
370 | argument (a table), like the `flags' function. | |
371 | ||
372 | 2006-12-26 Shmuel Zeigerman <shmuz@013net.net> | |
373 | ||
374 | * lpcre.c, lposix.c (gsub): [API change] method become function. | |
375 | * test/*.lua: modifying tests to reflect methods become functions. | |
376 | * lua/rex.lua: file deleted. | |
377 | * lpcre.c (luaopen_rex_pcre): changed the condition for validating | |
378 | run-time PCRE version. | |
379 | * lpcre_f.c: added new flags for support of PCRE 7.0. | |
380 | * lpcre.c, lposix.c (checkarg_gsub): if the 3-rd argument is of type | |
381 | "number" then it is converted to string (as in string.gsub). | |
382 | ||
383 | 2006-12-25 Shmuel Zeigerman <shmuz@013net.net> | |
384 | ||
385 | * lpcre.c, lposix.c (match, find, gmatch, split): [API change] | |
386 | methods become functions. | |
387 | ||
388 | 2006-12-23 Shmuel Zeigerman <shmuz@013net.net> | |
389 | ||
390 | * test/*.lua: refactoring. | |
391 | ||
392 | 2006-12-22 Shmuel Zeigerman <shmuz@013net.net> | |
393 | ||
394 | * lpcre.c, lposix.c (tfind and exec): [API change] | |
395 | the return value of the underlying pcre_exec/regexec call | |
396 | is not added in the case of successfull match. | |
397 | * lpcre.c, lposix.c (gtfind): [API change] method removed. | |
398 | * lpcre.c, lposix.c: Lrexlib 2.0 beta -> Lrexlib 2.0. | |
399 | * test/luatest.lua (eq): bugfix. | |
400 | * lua/*.lua: deleted all lua files except for rex.lua. | |
401 | * lua/rex.lua: fully rewritten; now contains wrappers for all methods. | |
402 | ||
403 | 2006-12-21 Shmuel Zeigerman <shmuz@013net.net> | |
404 | ||
405 | * test/*.lua: added tests for `split' method. | |
406 | * test/luatest.lua (print_results): function added. | |
407 | * test/common_sets.lua: file added. | |
408 | ||
409 | 2006-12-19 Shmuel Zeigerman <shmuz@013net.net> | |
410 | ||
411 | * lpcre.lua: `split' method added. | |
412 | * lposix.lua: `split' method added. | |
413 | * test/framework.lua: renamed to luatest.lua. | |
414 | ||
415 | 2006-12-15 Shmuel Zeigerman <shmuz@013net.net> | |
416 | ||
417 | * test/*.lua: refactoring. | |
418 | ||
419 | 2006-12-11 Shmuel Zeigerman <shmuz@013net.net> | |
420 | ||
421 | * test/*.lua: refactoring; deleting files; adding new files. | |
422 | ||
423 | 2006-12-10 Shmuel Zeigerman <shmuz@013net.net> | |
424 | ||
425 | * lpcre.c, lposix.c: gmatch bug fixed. Test case added. | |
426 | * lpcre.c, lposix.c: gtfind bug fixed. Test case added. | |
427 | ||
428 | 2006-12-09 Shmuel Zeigerman <shmuz@013net.net> | |
429 | ||
430 | * lua/gsub_test.lua, gsub_tstpsx.lua: 2 files deleted. | |
431 | * lua/posix_sets.lua, pcre_sets.lua, framework.lua, all_test.lua: | |
432 | 4 files added. | |
433 | * lpcre.c, lposix.c, common.c, common.h: | |
434 | an API alteration in gsub and gtfind methods: if a non-positive number | |
435 | is supplied as the 'n' parameter, then no iterations are done. | |
436 | Test cases added to *.lua test files. | |
437 | ||
438 | 2006-12-07 Shmuel Zeigerman <shmuz@013net.net> | |
439 | ||
440 | * lpcre.c, lposix.c (gsub): if parameter rep is a function, | |
441 | and its 2-nd return value (if present) is a string "break", | |
442 | then gsub immediately returns. | |
443 | * test/*.lua: refactoring. | |
444 | ||
445 | 2006-12-06 Shmuel Zeigerman <shmuz@013net.net> | |
446 | ||
447 | * lpcre.c, lposix.c: refactoring. | |
448 | ||
449 | 2006-12-05 Shmuel Zeigerman <shmuz@013net.net> | |
450 | ||
451 | * lposix.c: cosmetics. | |
452 | ||
453 | 2006-12-04 Shmuel Zeigerman <shmuz@013net.net> | |
454 | ||
455 | * lpcre.c, lposix.c: introduced new macros: CAP_BEG, CAP_END and CAP_LEN. | |
456 | * lpcre.c, lpcre_f.c, lposix.c: refactoring. | |
457 | ||
458 | 2006-12-03 Shmuel Zeigerman <shmuz@013net.net> | |
459 | ||
460 | * lpcre.c (Lpcre_gsub_func): an improvement. | |
461 | * lposix.c (posix_gsub_func): an improvement. | |
462 | * lpcre.c, lposix.c (gmatch, match, find): functions removed. | |
463 | * lpcre.c, lposix.c: method tgfind renamed to gtfind. | |
464 | * lpcre.c, lposix.c: gsub function become method. | |
465 | * test/posix_test.lua, pcre_test.lua: removed tests for removed functions. | |
466 | ||
467 | 2006-12-02 Shmuel Zeigerman <shmuz@013net.net> | |
468 | ||
469 | * common.c, common.h (TBuffer): moved here from lpcre.c. | |
470 | * common.c, common.h (TFreeList): helper class created. | |
471 | * lpcre.c (Lpcre_gsub_func): many changes. | |
472 | * lposix.c (posix_gsub_func): added function gsub. | |
473 | * test/gsub_test.lua: added testing for the new written-in-C gsub. | |
474 | * test/gsub_tstpsx.lua: file added. | |
475 | * test/rex_.lua: file deleted. | |
476 | ||
477 | 2006-12-01 Shmuel Zeigerman <shmuz@013net.net> | |
478 | ||
479 | * lpcre.c (TBuffer): helper class created. | |
480 | * lpcre.c (Lpcre_gsub_func): many changes. | |
481 | ||
482 | 2006-11-30 Shmuel Zeigerman <shmuz@013net.net> | |
483 | ||
484 | * lpcre.c (TExecData): struct renamed to TCallout. | |
485 | * lpcre.c (LpcreSetExecData): function renamed to SetupCallout. | |
486 | * lpcre.c (Lpcre_gsub_func): added function gsub. | |
487 | ||
488 | 2006-11-29 Shmuel Zeigerman <shmuz@013net.net> | |
489 | ||
490 | * test/*.lua: refactoring. | |
491 | * lpcre.c, lposix.c: refactoring. | |
492 | * lpcre.c, lposix.c (oldmatch): renamed to tfind. | |
493 | * lpcre.c, lposix.c (oldgmatch): renamed to tgfind. | |
494 | * */*.lua: renamed: oldmatch -> tfind; oldgmatch -> tgfind. | |
495 | ||
496 | 2006-11-28 Shmuel Zeigerman <shmuz@013net.net> | |
497 | ||
498 | * lpcre.c (Lpcre_dfa_exec): one Lmalloc call instead of two. | |
499 | ||
500 | 2006-11-27 Shmuel Zeigerman <shmuz@013net.net> | |
501 | ||
502 | * common.c (plainfind_func): rewritten to not use memicmp | |
503 | * test/*.lua: every test returns number of failures | |
504 | ||
505 | 2006-11-26 Shmuel Zeigerman <shmuz@013net.net> | |
506 | ||
507 | * lpcre.c, lposix.c: all Check_arg_* functions renamed to Checkarg_* | |
508 | * lpcre.c (Lpcre_dfa_exec): added 2 arguments to dfa_exec | |
509 | * common.h (DIM): macro removed | |
510 | * test/pcre_test.lua: tests for dfa_exec method added | |
511 | ||
512 | 2006-11-25 Shmuel Zeigerman <shmuz@013net.net> | |
513 | ||
514 | * lpcre.c, lposix.c: alpha -> beta. | |
515 | * lpcre_f.c: file added (was: part of lpcre.c). | |
516 | * rex_pcre.mak: updated due to the new file lpcre_f.c | |
517 | * *.mak: version updated to 2.0 | |
518 | * test/all_test.lua: file added. | |
519 | * test/posix_test.lua, test/pcre_test.lua: made modules. | |
520 | * test/posix_test.lua: tests added. | |
521 | * test/*.lua: refactoring. | |
522 | * lua/generic_gsub.lua: refactoring. | |
523 | ||
524 | 2006-11-23 Shmuel Zeigerman <shmuz@013net.net> | |
525 | ||
526 | * test/*.lua: refactoring. | |
527 | * lua/rex.lua, lua/rex_.lua (gsub): 6th and 7th arguments swapped. | |
528 | ||
529 | 2006-11-22 Shmuel Zeigerman <shmuz@013net.net> | |
530 | ||
531 | * lpcre.c (Check_arg_findmatch_func): 5th and 6th arguments swapped. | |
532 | * lpcre.c (Check_arg_gmatch_func): 4th and 5th arguments swapped. | |
533 | * test/posix_test.lua: file added. | |
534 | * lposix.c: 2 bugs fixed. | |
535 | ||
536 | 2006-11-21 Shmuel Zeigerman <shmuz@013net.net> | |
537 | ||
538 | * test/pcre_test.lua: "named subpatterns" tests added. | |
539 | ||
540 | 2006-11-20 Shmuel Zeigerman <shmuz@013net.net> | |
541 | ||
542 | * common.c, common.h (plainfind_func): function added. | |
543 | * lpcre.c, lposix.c (rex.plainfind): new function (from Lua side). | |
544 | * test/framework.lua: file added. | |
545 | * test/pcre_test.lua: file added. | |
546 | ||
547 | 2006-11-19 Shmuel Zeigerman <shmuz@013net.net> | |
548 | ||
549 | * lpcre.c (Lpcre_config): new function (pcre.config from Lua side). | |
550 | * lpcre.c: callout handling improved. | |
551 | ||
552 | 2006-11-18 Shmuel Zeigerman <shmuz@013net.net> | |
553 | ||
554 | * lpcre.c, lposix.c: old 'gmatch' method put back; renamed into 'oldgmatch'. | |
555 | ||
556 | 2006-11-17 Shmuel Zeigerman <shmuz@013net.net> | |
557 | ||
558 | * windows/bcc32/make_bcc.mak: deleted -DCOMPAT51, added -D$(CMDLINE) | |
559 | ||
560 | 2006-11-16 Shmuel Zeigerman <shmuz@013net.net> | |
561 | ||
562 | * gsub_test.lua (PatternLua2Pcre): function renamed into 'pat2pcre'. | |
563 | * lua/pat2pcre.lua: file added (was part of gsub_test.lua). | |
564 | ||
565 | 2006-11-15 Shmuel Zeigerman <shmuz@013net.net> | |
566 | ||
567 | * lpcre.c, lposix.c: Lua API has changed: | |
568 | * 'match' method renamed to 'oldmatch' | |
569 | * added (as much as possible) Lua string library API compatible: | |
570 | 'find', 'match' and 'gmatch' - each one being a function/method pair. | |
571 | * lpcre.c (newPCRE, flagsPCRE): functions removed. | |
572 | * lposix.c (newPOSIX, flagsPOSIX): functions removed. | |
573 | * lpcre.c, lposix.c: added literal rex._VERSION. | |
574 | * rex.lua: deleted everything except gsub. | |
575 | * rex.lua (gsub): rex.oldmatch is used instead of rex.match. | |
576 | * generic_gsub.lua: rex.oldmatch is used instead of rex.match. | |
577 | * test/test1.lua: file added | |
578 | ||
579 | 2006-11-12 Shmuel Zeigerman <shmuz@013net.net> | |
580 | ||
581 | * rex.lua (r:gmatch): metamethod added. | |
582 | * lpcre.c (Lpcre_maketables): function API simplified. | |
583 | * lpcre.c (Lpcre_getargs): function removed. | |
584 | * lpcre.c (LpcreGetExecParams): renamed from LpcreProcessExecParams. | |
585 | ||
586 | 2006-11-09 Shmuel Zeigerman <shmuz@013net.net> | |
587 | ||
588 | * lua/bit.lua: file removed. | |
589 | ||
590 | 2006-11-05 Shmuel Zeigerman <shmuz@013net.net> | |
591 | ||
592 | * rex.lua (gsub): a few structural optimizations. | |
593 | * rex.lua (gmatch): a bugfix. | |
594 | * rex.lua: tests deleted from the file. | |
595 | * gsub.lua: renamed into 'generic_gsub.lua'; made a module. | |
596 | * new_gsub.lua: file deleted. | |
597 | * lua/bit.lua: file added. | |
598 | ||
599 | 2006-11-04 Reuben Thomas <rrt@sc3d.org> | |
600 | ||
601 | * rex.lua (gmatch): function rewritten to be compatible with | |
602 | string.gmatch. | |
603 | * find.lua: file deleted. | |
604 | * lpcre.c (Lpcre_gmatch): function deleted. | |
605 | * lposix.c (posix_gmatch): function deleted. | |
606 | ||
607 | 2006-11-04 Shmuel Zeigerman <shmuz@013net.net> | |
608 | ||
609 | * gsub.lua, new_gsub.lua, gsub_test.lua, rex.lua: bugfixes. | |
610 | ||
611 | 2006-10-18 Shmuel Zeigerman <shmuz@013net.net> | |
612 | ||
613 | * [Windows] DLL builds need lua5.1.dll rather than lua51.dll. | |
614 | ||
615 | 2006-10-02 Shmuel Zeigerman <shmuz@013net.net> | |
616 | ||
617 | * Support of Lua 5.0 was dropped. | |
618 | ||
619 | 2006-09-03 Shmuel Zeigerman <shmuz@013net.net> | |
620 | ||
621 | * common.h (REX_LIB_API): renamed into REX_API. | |
622 | * common.h (flags_pair): renamed into flag_pair. | |
623 | * common.h, lpcre.h, lposix.h (REX_REGISTER): renamed into rex_register. | |
624 | * lpcre.h (luaopen_rex_pcre): error message text changed. | |
625 | * lposix.c (posix_handle, posix_typename): | |
626 | string literals are prefixed by $(REX_LIBNAME). | |
627 | [ This makes possible using multiple rex_posix libraries | |
628 | at a time by one lua_State. ] | |
629 | * lpcre.c (pcre_handle, pcre_typename): | |
630 | string literals are prefixed by $(REX_LIBNAME). | |
631 | [ This makes possible using multiple rex_pcre libraries | |
632 | at a time by one lua_State. ] | |
633 | * added directory test/Spencer (containing test.lua). | |
634 | * test/Spencer/test.lua: file returns a function. | |
635 | ||
636 | 2006-08-27 Shmuel Zeigerman <shmuz@013net.net> | |
637 | ||
638 | * (local) merge with the version put into CVS by R.Thomas. | |
639 | * common.h: added conditional #define's for lua_pushinteger and | |
640 | lua_tointeger (needed to compile with Lua 5.0). | |
641 | ||
642 | 2006-08-18 Shmuel Zeigerman <shmuz@013net.net> | |
643 | ||
644 | * common.c, lpcre.c, lposix.c: lua_pushnumber replaced with | |
645 | lua_pushinteger where appropriate (in many places). | |
646 | * lpcre.c (put_number): function renamed into put_integer. | |
647 | ||
648 | 2006-06-17 Shmuel Zeigerman <shmuz@013net.net> | |
649 | ||
650 | * lpcre.c (TPcreExecParam): `use_callout' struct member was eliminated. | |
651 | Instead, a special value of function reference (LUA_NOREF) is used. | |
652 | ||
653 | 2006-04-01 Reuben Thomas <rrt@sc3d.org> | |
654 | ||
655 | * lpcre.c, common.c: Remove trailing whitespace. | |
656 | * lpcre.c (Lpcre_gmatch): Remove unnecessary limit variable. | |
657 | ||
658 | 2006-02-17 Shmuel Zeigerman <shmuz@013net.net> | |
659 | ||
660 | * common.c, common.h (L_lua_error): function deleted. | |
661 | * common.c, lpcre.c, lposix.c: luaL_error used in place of L_lua_error. | |
662 | * lposix.c (posix2): struct renamed into TPosix. | |
663 | * lposix.c (posix_comp): lua_newuserdata() used instead of Lmalloc(). | |
664 | ||
665 | 2005-12-26 Shmuel Zeigerman <shmuz@013net.net> | |
666 | ||
667 | * Separate makefiles for POSIX and PCRE. No config file. | |
668 | ||
669 | 2005-11-28 Shmuel Zeigerman <shmuz@013net.net> | |
670 | ||
671 | * common.h (REXLIB_API): macro renamed into REX_LIB_API. | |
672 | * lposix.c (LREXLIB_POSIX_EXT): macro renamed into REX_POSIX_EXT. | |
673 | ||
674 | 2005-11-26 Shmuel Zeigerman <shmuz@013net.net> | |
675 | ||
676 | * common.h (LUAL_REGISTER): macro renamed into REX_REGISTER. | |
677 | * lpcre.c (LUAOPEN_LIB): macro renamed into REX_OPENLIB. | |
678 | * lpcre.c (LIBNAME): macro renamed into REX_LIBNAME. | |
679 | ||
680 | 2005-11-15 Shmuel Zeigerman <shmuz@013net.net> | |
681 | ||
682 | * common.c, common.h, lposix.c, lpcre.c: new files (lrexlib.c was | |
683 | splitted); POSIX and PCRE parts now live in their own | |
684 | separate files. | |
685 | * common.h (REXLIB_API): new macro. | |
686 | * lposix.c (LREXLIB_POSIX): macro removed. | |
687 | * lposix.c (LUAOPEN_LIB, LIBNAME): new macros. | |
688 | * lposix.c (rexlib): Lua-side functions flagsPOSIX and newPOSIX | |
689 | now have aliases (flags and new, correspondently). | |
690 | * lpcre.c (LREXLIB_PCRE): macro removed. | |
691 | * lpcre.c (LUAOPEN_LIB, LIBNAME): new macros. | |
692 | * lpcre.c (rexlib): Lua-side functions flagsPCRE and newPCRE now | |
693 | have aliases (flags and new, correspondently). | |
694 | * lpcre.c (Lpcre_vers): function renamed into Lpcre_version. | |
695 | ||
696 | 2005-11-12 Shmuel Zeigerman <shmuz@013net.net> | |
697 | ||
698 | * lrexlib.c (LUAL_REGISTER): new macro. | |
699 | ||
700 | 2005-11-10 Shmuel Zeigerman <shmuz@013net.net> | |
701 | ||
702 | * lrexlib.c (posix_match_generic): return (on Lua-side) an | |
703 | additional value (the return code of regexec). | |
704 | * lrexlib.c (Lpcre_match_generic): return (on Lua-side) an | |
705 | additional value (the return code of pcre_exec). | |
706 | * lrexlib.c (Lpcre_dfa_exec): return (on Lua-side) an additional | |
707 | value (the return code of pcre_dfa_exec). | |
708 | ||
709 | 2005-10-29 Shmuel Zeigerman <shmuz@013net.net> | |
710 | ||
711 | * lrexlib.c (get_flags): the function now accepts one parameter | |
712 | from the Lua stack (a table). Was: no parameters. (This | |
713 | affects the Lua-side functions: flagsPOSIX and | |
714 | flagsPCRE). | |
715 | * lrexlib.c (LREXLIB_POSIX_EXT): it's now possible to force-define | |
716 | this macro at compile time, bypassing the automatic | |
717 | detection. | |
718 | * lrexlib.c: added new POSIX flags (error values). | |
719 | * lrexlib.c (LpcreProcessParams): renamed into | |
720 | LpcreProcessExecParams. | |
721 | * lrexlib.c: added all missing PCRE flags starting from PCRE | |
722 | version 4. | |
723 | ||
724 | 2005-10-25 Shmuel Zeigerman <shmuz@013net.net> | |
725 | ||
726 | * lrexlib.c (DIM): new macro. | |
727 | * lrexlib.c (pcre2): renamed into TPcre. | |
728 | * lrexlib.c (TPcreExecParam): new struct. | |
729 | * lrexlib.c (LpcreProcessParams): new function. | |
730 | * lrexlib.c (Lpcre_dfa_exec): new function. Lua-side: dfa_exec. | |
731 | * lrexlib.c (Lpcre_dfa_restart): new function. Lua-side: dfa_restart. | |
732 | ||
733 | 2005-10-23 Shmuel Zeigerman <shmuz@013net.net> | |
734 | ||
735 | * lrexlib.c (TPcreCalloutData): new struct. | |
736 | * lrexlib.c (Lpcre_callout): new function. | |
737 | * lrexlib.c (Lpcre_match_generic): PCRE callout support added. | |
738 | * lrexlib.c: added new PCRE flags (from PCRE versions 5 and 6). | |
739 | * lrexlib.c (put_number): new helper function. | |
740 | ||
741 | 2005-05-27 Shmuel Zeigerman <shmuz@013net.net> | |
742 | ||
743 | * lrexlib.c: updated to work with Compat-5.1. | |
744 | ||
745 | 2004-12-18 Shmuel Zeigerman <shmuz@013net.net> | |
746 | ||
747 | * lrexlib.c (regex_tostring): error handling added. | |
748 | * lrexlib.c (regex_tostring): renamed into udata_tostring. | |
749 | * lrexlib.c (regex_get_flags): renamed into get_flags. | |
750 | * lrexlib.c (Lpcre_comp): pattern offset included in error message. | |
751 | * lrexlib.c (Lpcre_comp): error check added after pcre_study call. | |
752 | * lrexlib.c (Lpcre_push_substrings): eliminated inserting nil | |
753 | values into the matches table (not a bug, redundancy). | |
754 | * lrexlib.c (Lpcre_push_substrings): lua_rawset call instead of | |
755 | lua_settable. | |
756 | * lrexlib.c: all stack buffers used for message formatting changed | |
757 | in size from 128 to 256 bytes. | |
758 | ||
759 | 2004-11-17 Nick Gammon <nick@gammon.com.au> | |
760 | ||
761 | * lrexlib.c (pcre2): new member pcre2.extra. | |
762 | * lrexlib.c (Lpcre_comp): pcre_study call added. | |
763 | * lrexlib.c (Lpcre_push_matches, Lpcre_push_substrings, | |
764 | Lpcre_push_offsets): function signatures changed. | |
765 | * lrexlib.c (Lpcre_push_substrings): added support for "named | |
766 | subpatterns". | |
767 | ||
768 | 2004-09-15 Shmuel Zeigerman <shmuz@013net.net> | |
769 | ||
770 | * gsub.lua (is_odd): Function removed; math.mod used instead. | |
771 | ||
772 | 2004-08-24 Shmuel Zeigerman <shmuz@013net.net> | |
773 | ||
774 | * ChangeLog: File added. | |
775 | * NEWS: File added. | |
776 | ||
777 | 2004-08-12 Shmuel Zeigerman <shmuz@013net.net> | |
778 | ||
779 | * lrexlib.c (posix_exec, Lpcre_exec): New C functions, that | |
780 | correspond to the new lua function 'r:exec'. | |
781 | * lrexlib.c (posix_tostring, Lpcre_tostring): New C functions, | |
782 | that correspond to the new lua function 'r:__tostring'. | |
783 | * lrexlib.c: A table returned by r:match() as its 3rd result has | |
784 | no "n" index set anymore. | |
785 | * lrexlib.c (LREXLIB_POSIX_EXT): New macro. | |
786 | * lrexlib.c (PCRE_LOCALE_SUPPORT): Macro removed. | |
787 | * lrexlib.c (posix_push_matches, Lpcre_push_matches): Functions | |
788 | became typedef's. | |
789 | * lrexlib.c (posix_match_generic, Lpcre_match_generic) | |
790 | (posix_push_substrings, Lpcre_push_substrings) | |
791 | (posix_push_offsets, Lpcre_push_offsets): New functions. | |
792 | * gsub.lua: File added. | |
793 | ||
794 | 2004-08-11 Reuben Thomas <rrt@sc3d.org> | |
795 | ||
796 | * config: Many changes. | |
797 | * Makefile: Many changes. | |
798 | ||
799 | 2004-08-11 Shmuel Zeigerman <shmuz@013net.net> | |
800 | ||
801 | * config: File added + many changes. | |
802 | * Makefile: Many changes. | |
803 | ||
804 | 2004-07-15 Shmuel Zeigerman <shmuz@013net.net> | |
805 | ||
806 | * lrexlib.c (posix_get_flags, Lpcre_get_flags, Lpcre_vers): New C | |
807 | functions, that correspond to new lua functions | |
808 | 'flagsPOSIX', 'flagsPCRE', 'versionPCRE'. | |
809 | * lrexlib.c: Lua functions 'newPCRE' and 'newPOSIX' accept an | |
810 | optional 2nd argument ("compilation flags"). | |
811 | * lrexlib.c: Lua function 'newPCRE' accepts an optional 3rd | |
812 | argument ("locale") - but only if the macro | |
813 | PCRE_LOCALE_SUPPORT was defined at the compilation time. | |
814 | * lrexlib.c: Lua function 'r:match' accepts optional 2nd and 3rd | |
815 | arguments ("startoffset" and "execution flags"). | |
816 | * lrexlib.c: Lua function 'r:gmatch' accepts an optional 2nd | |
817 | argument ("execution flags"). | |
818 | * lrexlib.c (posix_gmatch, Lpcre_gmatch): If a user-defined lua | |
819 | function passed as the 2nd parameter returns true value, | |
820 | then lua function 'r:gmatch' returns. | |
821 | * lrexlib.c (posix_push_matches, Lpcre_push_matches): The table of | |
822 | substring matches contains false in the positions | |
823 | correspondent to non-matched subpatterns. | |
824 | * lrexlib.c: Static functions that began with 'pcre' were renamed | |
825 | to begin with 'Lpcre'. | |
826 | * lrexlib.c (posix_getargs, Lpcre_getargs): | |
827 | Fixed - allocated memory was not freed. | |
828 | Fixed - removed redundant calls of lua_checkstack. | |
829 | * lrexlib.c: Allocation/freeing memory moved from matching | |
830 | operations to pattern-compiling and garbage-collecting | |
831 | ones. | |
832 | * lrexlib.c (posix_match, posix_gmatch, Lpcre_match, Lpcre_gmatch): | |
833 | luaL_checkudata is used in checking of userdata-type | |
834 | parameter. | |
835 | * lrexlib.c: Removed conditional compilation controlled by the | |
836 | macro REG_BASIC. | |
837 | * lrexlib.c (posix_comp): The following invalid ANSI C89 code: | |
838 | size_t sz = <expression>; char errbuf[sz]; | |
839 | made valid with the use of malloc/free. | |
840 | * lrexlib.c (Lpcre_gmatch): Keeping 'subject' and 'length' | |
841 | unchanged while updating 'startoffset'. | |
842 | * lbitlib.c: File removed. | |
843 | * Makefile: Removed parts related to lbitlib.c. | |
844 | * make_bcc.mak: File added. |
0 | License of Lrexlib release | |
1 | -------------------------- | |
2 | ||
3 | Copyright (C) Reuben Thomas 2000-2020 | |
4 | Copyright (C) Shmuel Zeigerman 2004-2020 | |
5 | ||
6 | Permission is hereby granted, free of charge, to any person | |
7 | obtaining a copy of this software and associated | |
8 | documentation files (the "Software"), to deal in the | |
9 | Software without restriction, including without limitation | |
10 | the rights to use, copy, modify, merge, publish, distribute, | |
11 | sublicense, and/or sell copies of the Software, and to | |
12 | permit persons to whom the Software is furnished to do so, | |
13 | subject to the following conditions: | |
14 | ||
15 | The above copyright notice and this permission notice shall | |
16 | be included in all copies or substantial portions of the | |
17 | Software. | |
18 | ||
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | |
20 | KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | |
21 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | |
22 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS | |
23 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
24 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
25 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
26 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
0 | # Makefile for lrexlib | |
1 | ||
2 | VERSION = 2.9.1 | |
3 | PROJECT = lrexlib | |
4 | PROJECT_VERSIONED = $(PROJECT)-$(VERSION) | |
5 | ||
6 | # Commands | |
7 | LUA = lua | |
8 | LUAROCKS = luarocks | |
9 | CP = cp -a | |
10 | RM = rm -f | |
11 | RST2HTML = rst2html | |
12 | REGNAMES = gnu pcre pcre2 posix oniguruma tre | |
13 | LUAROCKS_COMMAND = make | |
14 | ||
15 | ||
16 | .SUFFIXES: .txt .html | |
17 | ||
18 | HTML = doc/index.html doc/manual.html | |
19 | ||
20 | .txt.html: | |
21 | $(RST2HTML) --stylesheet-path=doc/lrexlib.css --link-stylesheet --initial-header-level=2 --date --time $< $@ | |
22 | ||
23 | build: | |
24 | $(MAKE) install LUAROCKS="$(LUAROCKS) --tree=luarocks" | |
25 | ||
26 | install: rockspecs | |
27 | for i in *.rockspec; do \ | |
28 | $(LUAROCKS) $(LUAROCKS_COMMAND) $$i; \ | |
29 | done | |
30 | ||
31 | rockspecs: | |
32 | rm -f *.rockspec | |
33 | $(LUA) mkrockspecs.lua $(PROJECT) $(VERSION) | |
34 | ||
35 | doc/index.txt: README.rst | |
36 | $(CP) $< $@ | |
37 | ||
38 | check: build | |
39 | for i in $(REGNAMES); do \ | |
40 | LUA_PATH="test/?.lua;$(LUA_PATH);" $(LUA) test/runtest.lua -dsrc/$$i $$i; \ | |
41 | done | |
42 | ||
43 | clean: | |
44 | $(RM) $(HTML) doc/index.txt *.rockspec | |
45 | ||
46 | release: check | |
47 | agrep -d 'Release' $(VERSION) NEWS | tail -n +3 | head -n -2 > release-notes && \ | |
48 | git diff --exit-code && \ | |
49 | git tag -a -m "Release tag" rel-`echo $(VERSION) | sed -e 's/\./-/g'` && \ | |
50 | git push && git push --tags && \ | |
51 | $(MAKE) build LUAROCKS_COMMAND=build && \ | |
52 | woger lua package=$(PROJECT) package_name=$(PROJECT) version=$(VERSION) description="Lua binding for regex libraries" notes=release-notes home="`$(LUA) -e'version="'$(VERSION)'"; flavour="none"; t = require "rockspecs"; print(t.default.description.homepage)'`" | |
53 | rm -f release-notes |
0 | 2020-08-07 Release 2.9.1 | |
1 | ||
2 | * Add Lua 5.4 support | |
3 | ||
4 | 2017-11-07 Release 2.9.0 | |
5 | ||
6 | * Add PCRE2 support. | |
7 | ||
8 | 2015-02-26 Release 2.8.0 | |
9 | ||
10 | * Add Lua 5.3 support | |
11 | * No longer return empty matches adjacent to previous non-empty match. | |
12 | ||
13 | 2013-01-08 Release 2.7.2 | |
14 | ||
15 | * Fixed the use of alternative allocators, and a memory leak. | |
16 | * Simplify the build system further. | |
17 | * Change rockspec to build from git now that github no longer | |
18 | supports downloads. | |
19 | ||
20 | 2012-10-18 Release 2.7.1 | |
21 | ||
22 | * Fixed Lua 5.1 compatibility, broken in 2.7.0. | |
23 | * Added ability to specify no replacement to gsub with a nil or | |
24 | false replacement argument. | |
25 | ||
26 | 2012-10-04 Release 2.7.0 | |
27 | ||
28 | * Added support for searching raw memory buffers (e.g. made with | |
29 | alien). | |
30 | * Fixed possible invalid code generation in C (thanks, Michael | |
31 | Tautschnig). | |
32 | * Generate LuaRock rockspecs for all modules. | |
33 | * Greatly simplify UNIX build system, relying on LuaRocks. | |
34 | * Allow POSIX REG_STARTEND to be used on any system supporting it. | |
35 | * Add a test set for POSIX regex engine (thanks, Enrico Tassi). | |
36 | * Simplify some code. | |
37 | * Always use Lua state memory allocator. | |
38 | ||
39 | 2012-04-13 Release 2.6.0 | |
40 | ||
41 | * Added support for Lua 5.2. | |
42 | ||
43 | 2010-12-15 Release 2.5.3 | |
44 | ||
45 | * Bug fix to rex.split. | |
46 | * Support for new flags in PCRE 8.11. | |
47 | ||
48 | 2010-11-10 Release 2.5.2 | |
49 | ||
50 | * Important bug fix (thanks to Enrico Tassi for the report) | |
51 | affecting platforms with certain alignment requirements. | |
52 | * Improved build system for Windows. | |
53 | * Minor improvements to the manual. | |
54 | ||
55 | 2010-10-04 Release 2.5.1 | |
56 | ||
57 | * Minor improvements and fixes, no changes to library code. | |
58 | ||
59 | ||
60 | 2010-10-03 Release 2.5.0 | |
61 | ||
62 | * Added bindings of the TRE and GNU regex APIs. | |
63 | ||
64 | * Improved and simplified the build system. | |
65 | ||
66 | ||
67 | 2008-08-04 Release 2.4.0 | |
68 | ||
69 | * All functions receiving string-type regex accept a compiled regex too. | |
70 | ||
71 | * Added binding of the Oniguruma library. | |
72 | ||
73 | ||
74 | 2008-05-31 Release 2.3.0 | |
75 | ||
76 | * Added methods: find and match. | |
77 | ||
78 | ||
79 | 2007-12-29 Release 2.2.2 | |
80 | ||
81 | * Makefiles fixed. | |
82 | ||
83 | ||
84 | 2007-10-25 Release 2.2.1 | |
85 | ||
86 | * Added new PCRE constants up to version 7.4. | |
87 | ||
88 | * 1 bugfix. | |
89 | ||
90 | ||
91 | 2007-06-19 Release 2.2.0 | |
92 | ||
93 | * gsub API extension: allow the 4-th argument to be a function. | |
94 | ||
95 | * Added functions: maketables (PCRE). | |
96 | ||
97 | * Improved algorithm for global searches (PCRE; retry after empty match). | |
98 | ||
99 | * cflags may be specified by a string (PCRE). | |
100 | ||
101 | * Bugfixes. | |
102 | ||
103 | ||
104 | 2007-02-13 Release 2.1.0 | |
105 | ||
106 | * Improved error handling. | |
107 | ||
108 | * A minor change in API. | |
109 | ||
110 | ||
111 | 2007-01-30 Release 2.0.2 | |
112 | ||
113 | * 3 bugfixes. | |
114 | ||
115 | * Corrections in the Reference Manual. | |
116 | ||
117 | ||
118 | 2007-01-12 Release 2.0.1 | |
119 | ||
120 | * Bugfix in memory deallocation. | |
121 | ||
122 | ||
123 | 2007-01-05 Release 2.0 | |
124 | ||
125 | * Added functions: match, find, gmatch, gsub, split, config, plainfind. | |
126 | ||
127 | * Added methods: dfa_exec. | |
128 | ||
129 | * Removed methods: gmatch. | |
130 | ||
131 | * Renamed functions: | |
132 | newPCRE, newPOSIX --> new. | |
133 | flagsPCRE, flagsPOSIX --> flags. | |
134 | versionPCRE --> version. | |
135 | ||
136 | * Renamed methods: match --> tfind. | |
137 | ||
138 | * Added test suite. | |
139 | ||
140 | * Added reference manual. | |
141 | ||
142 | ||
143 | 2004-12-19 Release 1.19 | |
144 | ||
145 | * Added support for PCRE "named subpatterns" (thanks to Nick Gammon). | |
146 | ||
147 | * Several minor improvements. | |
148 | ||
149 | ||
150 | 2004-08-25 Release 1.18 | |
151 | ||
152 | * New lua function 'r:exec'. | |
153 | ||
154 | * New lua function 'r:__tostring'. | |
155 | ||
156 | * A table returned by r:match() as its 3rd result has no "n" index | |
157 | set anymore. Use table.getn instead. | |
158 | ||
159 | * Fixed the bug preventing compilation with the "basic" POSIX | |
160 | regexp library. | |
161 | ||
162 | * Makefile improved. | |
163 | ||
164 | * Added file gsub.lua containing function 'generic_gsub'. | |
165 | ||
166 | ||
167 | 2004-07-16 Release 17 | |
168 | ||
169 | * New lua functions 'flagsPOSIX', 'flagsPCRE', 'versionPCRE'. | |
170 | ||
171 | * Lua functions 'newPCRE' and 'newPOSIX' accept an optional | |
172 | 2nd argument ("compilation flags"). | |
173 | ||
174 | * Lua function 'newPCRE' accepts an optional 3rd argument | |
175 | ("locale"). | |
176 | ||
177 | * Lua function 'r:match' accepts optional 2nd and 3rd | |
178 | arguments ("startoffset" and "execution flags"). | |
179 | ||
180 | * Lua function 'r:gmatch' accepts an optional 2nd argument | |
181 | ("execution flags"). | |
182 | ||
183 | * If a user-defined function passed to r:gmatch() as its | |
184 | 2nd parameter returns true value, then r:gmatch() returns. | |
185 | ||
186 | * The table of substring matches contains false in the positions | |
187 | correspondent to non-matched subpatterns. | |
188 |
0 | Lrexlib | |
1 | ======= | |
2 | ||
3 | | by Reuben Thomas (rrt@sc3d.org) | |
4 | | and Shmuel Zeigerman (shmuz@013net.net) | |
5 | ||
6 | **Lrexlib** provides bindings of five regular expression library APIs | |
7 | (POSIX_, PCRE_, PCRE2_, GNU_, TRE_ and Oniguruma_) to Lua_ >= 5.1. | |
8 | The bindings for TRE and Oniguruma are not currently complete. | |
9 | ||
10 | **Lrexlib** is copyright Reuben Thomas 2000-2020 and copyright Shmuel | |
11 | Zeigerman 2004-2020, and is released under the same license as Lua, | |
12 | the MIT_ license (otherwise known as the revised BSD license). There | |
13 | is no warranty. | |
14 | ||
15 | .. _POSIX: http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html | |
16 | .. _PCRE: http://www.pcre.org/pcre.txt | |
17 | .. _PCRE2: http://www.pcre.org/pcre2.txt | |
18 | .. _GNU: ftp://ftp.gnu.org/old-gnu/regex/ | |
19 | .. _Oniguruma: https://github.com/kkos/oniguruma | |
20 | .. _TRE: http://laurikari.net/tre/documentation/ | |
21 | .. _Lua: http://www.lua.org | |
22 | .. _MIT: http://www.opensource.org/licenses/mit-license.php | |
23 | ||
24 | Please report bugs and make suggestions to the maintainer, or use the | |
25 | LuaForge trackers and mailing lists. | |
26 | ||
27 | Thanks to Thatcher Ulrich for bug and warning fixes, and to Nick | |
28 | Gammon for adding support for PCRE named subpatterns. | |
29 | ||
30 | ----------------------------------------------------------- | |
31 | ||
32 | Installation | |
33 | ------------ | |
34 | ||
35 | Lrexlib is installed with LuaRocks_, using the command:: | |
36 | ||
37 | luarocks install lrexlib-FLAVOUR | |
38 | ||
39 | where **FLAVOUR** is one of PCRE, PCRE2, POSIX, oniguruma, TRE, GNU | |
40 | ||
41 | .. _LuaRocks: http://www.luarocks.org | |
42 | ||
43 | ||
44 | Links | |
45 | ----- | |
46 | ||
47 | - License_ | |
48 | - `Reference Manual`_ | |
49 | - `LuaForge Project Page`_ | |
50 | - Download_ | |
51 | ||
52 | .. _License: http://rrthomas.github.com/lrexlib/license.html | |
53 | .. _Reference Manual: http://rrthomas.github.com/lrexlib/manual.html | |
54 | .. _LuaForge Project Page: http://luaforge.net/projects/lrexlib/ | |
55 | .. _Download: https://github.com/rrthomas/lrexlib/downloads |
0 | /* | |
1 | :Author: David Goodger | |
2 | :Contact: goodger@users.sourceforge.net | |
3 | :Date: $Date: 2006/01/21 03:17:49 $ | |
4 | :Revision: $Revision: 1.1 $ | |
5 | :Copyright: This stylesheet has been placed in the public domain. | |
6 | ||
7 | Default cascading style sheet for the HTML output of Docutils. | |
8 | ||
9 | See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to | |
10 | customize this style sheet. | |
11 | */ | |
12 | ||
13 | /* used to remove borders from tables and images */ | |
14 | .borderless, table.borderless td, table.borderless th { | |
15 | border: 0 } | |
16 | ||
17 | table.borderless td, table.borderless th { | |
18 | /* Override padding for "table.docutils td" with "! important". | |
19 | The right padding separates the table cells. */ | |
20 | padding: 0 0.5em 0 0 ! important } | |
21 | ||
22 | .first { | |
23 | /* Override more specific margin styles with "! important". */ | |
24 | margin-top: 0 ! important } | |
25 | ||
26 | .last, .with-subtitle { | |
27 | margin-bottom: 0 ! important } | |
28 | ||
29 | .hidden { | |
30 | display: none } | |
31 | ||
32 | a.toc-backref { | |
33 | text-decoration: none ; | |
34 | color: black } | |
35 | ||
36 | blockquote.epigraph { | |
37 | margin: 2em 5em ; } | |
38 | ||
39 | dl.docutils dd { | |
40 | margin-bottom: 0.5em } | |
41 | ||
42 | /* Uncomment (and remove this text!) to get bold-faced definition list terms | |
43 | dl.docutils dt { | |
44 | font-weight: bold } | |
45 | */ | |
46 | ||
47 | div.abstract { | |
48 | margin: 2em 5em } | |
49 | ||
50 | div.abstract p.topic-title { | |
51 | font-weight: bold ; | |
52 | text-align: center } | |
53 | ||
54 | div.admonition, div.attention, div.caution, div.danger, div.error, | |
55 | div.hint, div.important, div.note, div.tip, div.warning { | |
56 | margin: 2em ; | |
57 | border: medium outset ; | |
58 | padding: 1em } | |
59 | ||
60 | div.admonition p.admonition-title, div.hint p.admonition-title, | |
61 | div.important p.admonition-title, div.note p.admonition-title, | |
62 | div.tip p.admonition-title { | |
63 | font-weight: bold ; | |
64 | font-family: sans-serif } | |
65 | ||
66 | div.attention p.admonition-title, div.caution p.admonition-title, | |
67 | div.danger p.admonition-title, div.error p.admonition-title, | |
68 | div.warning p.admonition-title { | |
69 | color: red ; | |
70 | font-weight: bold ; | |
71 | font-family: sans-serif } | |
72 | ||
73 | /* Uncomment (and remove this text!) to get reduced vertical space in | |
74 | compound paragraphs. | |
75 | div.compound .compound-first, div.compound .compound-middle { | |
76 | margin-bottom: 0.5em } | |
77 | ||
78 | div.compound .compound-last, div.compound .compound-middle { | |
79 | margin-top: 0.5em } | |
80 | */ | |
81 | ||
82 | div.dedication { | |
83 | margin: 2em 5em ; | |
84 | text-align: center ; | |
85 | font-style: italic } | |
86 | ||
87 | div.dedication p.topic-title { | |
88 | font-weight: bold ; | |
89 | font-style: normal } | |
90 | ||
91 | div.figure { | |
92 | margin-left: 2em ; | |
93 | margin-right: 2em } | |
94 | ||
95 | div.footer, div.header { | |
96 | clear: both; | |
97 | font-size: smaller } | |
98 | ||
99 | div.line-block { | |
100 | display: block ; | |
101 | margin-top: 1em ; | |
102 | margin-bottom: 1em } | |
103 | ||
104 | div.line-block div.line-block { | |
105 | margin-top: 0 ; | |
106 | margin-bottom: 0 ; | |
107 | margin-left: 1.5em } | |
108 | ||
109 | div.sidebar { | |
110 | margin-left: 1em ; | |
111 | border: medium outset ; | |
112 | padding: 1em ; | |
113 | background-color: #ffffee ; | |
114 | width: 40% ; | |
115 | float: right ; | |
116 | clear: right } | |
117 | ||
118 | div.sidebar p.rubric { | |
119 | font-family: sans-serif ; | |
120 | font-size: medium } | |
121 | ||
122 | div.system-messages { | |
123 | margin: 5em } | |
124 | ||
125 | div.system-messages h1 { | |
126 | color: red } | |
127 | ||
128 | div.system-message { | |
129 | border: medium outset ; | |
130 | padding: 1em } | |
131 | ||
132 | div.system-message p.system-message-title { | |
133 | color: red ; | |
134 | font-weight: bold } | |
135 | ||
136 | div.topic { | |
137 | margin: 2em } | |
138 | ||
139 | h1.section-subtitle, h2.section-subtitle, h3.section-subtitle, | |
140 | h4.section-subtitle, h5.section-subtitle, h6.section-subtitle { | |
141 | margin-top: 0.4em } | |
142 | ||
143 | h1.title { | |
144 | text-align: center } | |
145 | ||
146 | h2.subtitle { | |
147 | text-align: center } | |
148 | ||
149 | hr.docutils { | |
150 | width: 75% } | |
151 | ||
152 | img.align-left { | |
153 | clear: left } | |
154 | ||
155 | img.align-right { | |
156 | clear: right } | |
157 | ||
158 | ol.simple, ul.simple { | |
159 | margin-bottom: 1em } | |
160 | ||
161 | ol.arabic { | |
162 | list-style: decimal } | |
163 | ||
164 | ol.loweralpha { | |
165 | list-style: lower-alpha } | |
166 | ||
167 | ol.upperalpha { | |
168 | list-style: upper-alpha } | |
169 | ||
170 | ol.lowerroman { | |
171 | list-style: lower-roman } | |
172 | ||
173 | ol.upperroman { | |
174 | list-style: upper-roman } | |
175 | ||
176 | p.attribution { | |
177 | text-align: right ; | |
178 | margin-left: 50% } | |
179 | ||
180 | p.caption { | |
181 | font-style: italic } | |
182 | ||
183 | p.credits { | |
184 | font-style: italic ; | |
185 | font-size: smaller } | |
186 | ||
187 | p.label { | |
188 | white-space: nowrap } | |
189 | ||
190 | p.rubric { | |
191 | font-weight: bold ; | |
192 | font-size: larger ; | |
193 | color: maroon ; | |
194 | text-align: center } | |
195 | ||
196 | p.sidebar-title { | |
197 | font-family: sans-serif ; | |
198 | font-weight: bold ; | |
199 | font-size: larger } | |
200 | ||
201 | p.sidebar-subtitle { | |
202 | font-family: sans-serif ; | |
203 | font-weight: bold } | |
204 | ||
205 | p.topic-title { | |
206 | font-weight: bold } | |
207 | ||
208 | pre.address { | |
209 | margin-bottom: 0 ; | |
210 | margin-top: 0 ; | |
211 | font-family: serif ; | |
212 | font-size: 100% } | |
213 | ||
214 | pre.literal-block, pre.doctest-block { | |
215 | margin-left: 2em ; | |
216 | margin-right: 2em ; | |
217 | background-color: #eeeeee } | |
218 | ||
219 | span.classifier { | |
220 | font-family: sans-serif ; | |
221 | font-style: oblique } | |
222 | ||
223 | span.classifier-delimiter { | |
224 | font-family: sans-serif ; | |
225 | font-weight: bold } | |
226 | ||
227 | span.interpreted { | |
228 | font-family: sans-serif } | |
229 | ||
230 | span.option { | |
231 | white-space: nowrap } | |
232 | ||
233 | span.pre { | |
234 | white-space: pre } | |
235 | ||
236 | span.problematic { | |
237 | color: red } | |
238 | ||
239 | span.section-subtitle { | |
240 | /* font-size relative to parent (h1..h6 element) */ | |
241 | font-size: 80% } | |
242 | ||
243 | table.citation { | |
244 | border-left: solid 1px gray; | |
245 | margin-left: 1px } | |
246 | ||
247 | table.docinfo { | |
248 | margin: 2em 4em } | |
249 | ||
250 | table.docutils { | |
251 | margin-top: 0.5em ; | |
252 | margin-bottom: 0.5em } | |
253 | ||
254 | table.footnote { | |
255 | border-left: solid 1px black; | |
256 | margin-left: 1px } | |
257 | ||
258 | table.docutils td, table.docutils th, | |
259 | table.docinfo td, table.docinfo th { | |
260 | padding-left: 0.5em ; | |
261 | padding-right: 0.5em ; | |
262 | vertical-align: top } | |
263 | ||
264 | table.docutils th.field-name, table.docinfo th.docinfo-name { | |
265 | font-weight: bold ; | |
266 | text-align: left ; | |
267 | white-space: nowrap ; | |
268 | padding-left: 0 } | |
269 | ||
270 | h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, | |
271 | h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { | |
272 | font-size: 100% } | |
273 | ||
274 | tt.docutils { | |
275 | background-color: #eeeeee } | |
276 | ||
277 | ul.auto-toc { | |
278 | list-style-type: none } |
0 | <!DOCTYPE HTML SYSTEM> | |
1 | <html> | |
2 | <head> | |
3 | <title>Lrexlib: The License</title> | |
4 | </head> | |
5 | <body> | |
6 | ||
7 | <h2>Lrexlib</h2> | |
8 | <p>Copyright © Reuben Thomas 2000-2020<br> | |
9 | Copyright © Shmuel Zeigerman 2004-2020 | |
10 | ||
11 | <p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: | |
12 | ||
13 | <p>The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. | |
14 | ||
15 | <p>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
16 | ||
17 | </body> | |
18 | </html> | |
19 |
0 | /* | |
1 | :Author: Shmuel Zeigerman | |
2 | :Contact: shmuz at actcom co il | |
3 | :Copyright: This stylesheet has been placed in the public domain. | |
4 | ||
5 | [Optionally place a description here.] | |
6 | */ | |
7 | ||
8 | @import url(html4css1.css); | |
9 | ||
10 | hr.docutils { | |
11 | width: 100% | |
12 | } | |
13 | ||
14 | .funcdef { | |
15 | font-weight: bold ; | |
16 | font-size: 100% | |
17 | } | |
18 | ||
19 | body { | |
20 | margin-left: 1em ; | |
21 | margin-right: 1em ; | |
22 | font-family: "Verdana", sans-serif ; | |
23 | } | |
24 | ||
25 | table { | |
26 | border-spacing: 0; | |
27 | } | |
28 | ||
29 | tr { | |
30 | margin: 0; | |
31 | padding: 0; | |
32 | } |
0 | .. role:: funcdef(literal) | |
1 | ||
2 | Lrexlib Reference Manual | |
3 | ======================== | |
4 | ||
5 | .. contents:: Table of Contents | |
6 | ||
7 | ------------------------------------------------------------ | |
8 | ||
9 | **Lrexlib** builds into shared libraries called by default *rex_posix.so*, | |
10 | *rex_pcre.so*, *rex_pcre2.so*, *rex_gnu.so*, *rex_tre.so* and *rex_onig.so*, | |
11 | which can be used with *require*. | |
12 | ||
13 | ------------------------------------------------------------ | |
14 | ||
15 | Notes | |
16 | ~~~~~ | |
17 | ||
18 | 1. Most functions and methods in Lrexlib have mandatory and optional arguments. | |
19 | There are no dependencies between arguments in Lrexlib's functions and | |
20 | methods. Any optional argument can be supplied as ``nil`` (or omitted if it | |
21 | is a trailing argument), the library will then use the default value for that | |
22 | argument. | |
23 | ||
24 | 2. This document uses the following syntax for optional arguments: they are | |
25 | bracketed separately, and commas are left outside brackets, e.g.:: | |
26 | ||
27 | MyFunc (arg1, arg2, [arg3], [arg4]) | |
28 | ||
29 | 3. Throughout this document (unless it causes ambiguity), the identifier **rex** | |
30 | is used in place of either *rex_posix*, *rex_pcre*, *rex_pcre2*, *rex_gnu*, | |
31 | *rex_onig* or *rex_tre*, which are the default namespaces for the corresponding | |
32 | libraries. | |
33 | ||
34 | 4. All functions that take a regular expression pattern as an argument will | |
35 | generate an error if that pattern is found invalid by the regex library. | |
36 | ||
37 | 5. All functions that take a string-type regex argument accept a compiled regex | |
38 | too. In this case, the cf_ and larg_ arguments are ignored (should | |
39 | be either supplied as nils or omitted). | |
40 | ||
41 | 6. All functions that take a string-type subject accept a table or userdata that | |
42 | has a ``topointer`` method and ``__len`` metamethod, and take the subject to | |
43 | be a block of memory starting at the address returned by | |
44 | ``subject:topointer()`` and of length ``#subject``. This works with buffers | |
45 | objects from the alien library (https://github.com/mascarenhas/alien). Note | |
46 | that special attention is needed with POSIX regex libraries that do not | |
47 | support ``REG_STARTEND``, and hence need NUL-terminated subjects: the NUL is | |
48 | not included in the string length, so alien buffers must be wrapped to report | |
49 | a length that excludes the NUL. | |
50 | ||
51 | .. _cf: | |
52 | ||
53 | 7. The default value for *compilation flags* (*cf*) that Lrexlib uses when | |
54 | the parameter is not supplied or ``nil`` is: | |
55 | ||
56 | * ``REG_EXTENDED`` for POSIX and TRE | |
57 | * ``0`` for PCRE and PCRE2 | |
58 | * ``ONIG_OPTION_NONE`` for Oniguruma | |
59 | * ``SYNTAX_POSIX_EXTENDED`` for GNU | |
60 | ||
61 | **PCRE**, **PCRE2**, **Oniguruma**: *cf* may also be supplied as a string, | |
62 | whose characters stand for compilation flags. Combinations of the following | |
63 | characters (case sensitive) are supported: | |
64 | ||
65 | =============== ================== ================== ============================== | |
66 | **Character** **PCRE flag** **PCRE2 flag** **Oniguruma flag** | |
67 | =============== ================== ================== ============================== | |
68 | **i** PCRE_CASELESS PCRE2_CASELESS ONIG_OPTION_IGNORECASE | |
69 | **m** PCRE_MULTILINE PCRE2_MULTILINE ONIG_OPTION_NEGATE_SINGLELINE | |
70 | **s** PCRE_DOTALL PCRE2_DOTALL ONIG_OPTION_MULTILINE | |
71 | **x** PCRE_EXTENDED PCRE2_EXTENDED ONIG_OPTION_EXTEND | |
72 | **U** PCRE_UNGREEDY PCRE2_UNGREEDY n/a | |
73 | **X** PCRE_EXTRA n/a n/a | |
74 | =============== ================== ================== ============================== | |
75 | ||
76 | .. _ef: | |
77 | ||
78 | 8. The default value for *execution flags* (*ef*) that Lrexlib uses when | |
79 | the parameter is not supplied or ``nil``, is: | |
80 | ||
81 | * ``0`` for standard POSIX regex library | |
82 | * ``REG_STARTEND`` for those POSIX regex libraries that support it, e.g. Spencer's | |
83 | * ``0`` for PCRE, PCRE2, Oniguruma and TRE | |
84 | ||
85 | .. _larg: | |
86 | ||
87 | 9. The notation *larg...* is used to indicate optional library-specific | |
88 | arguments, which are documented in the ``new`` method of each library. | |
89 | ||
90 | 10. In the functions searching for multiple matches (``gmatch``, ``gsub``, | |
91 | ``split``, ``count``) every empty match adjacent to the previous match | |
92 | is discarded, e.g. ``rex.count("abc",".*")`` will return 1. | |
93 | ||
94 | ------------------------------------------------------------ | |
95 | ||
96 | Functions and methods common to all bindings | |
97 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
98 | ||
99 | match | |
100 | ----- | |
101 | ||
102 | :funcdef:`rex.match (subj, patt, [init], [cf], [ef], [larg...])` | |
103 | ||
104 | or | |
105 | ||
106 | :funcdef:`r:match (subj, [init], [ef])` | |
107 | ||
108 | The function searches for the first match of the regexp *patt* in the string | |
109 | *subj*, starting from offset *init*, subject to flags *cf* and *ef*. | |
110 | ||
111 | +---------+-------------------------------+--------+-------------+ | |
112 | |Parameter| Description | Type |Default Value| | |
113 | +=========+===============================+========+=============+ | |
114 | | r |regex object produced by new |userdata| n/a | | |
115 | +---------+-------------------------------+--------+-------------+ | |
116 | | subj | subject | string | n/a | | |
117 | +---------+-------------------------------+--------+-------------+ | |
118 | | patt |regular expression pattern |string | n/a | | |
119 | | | |or | | | |
120 | | | |userdata| | | |
121 | +---------+-------------------------------+--------+-------------+ | |
122 | | [init] |start offset in the subject | number | 1 | | |
123 | | |(can be negative) | | | | |
124 | +---------+-------------------------------+--------+-------------+ | |
125 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
126 | +---------+-------------------------------+--------+-------------+ | |
127 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
128 | +---------+-------------------------------+--------+-------------+ | |
129 | |[larg...]|library-specific arguments | | | | |
130 | +---------+-------------------------------+--------+-------------+ | |
131 | ||
132 | **Returns on success:** | |
133 | 1. All substring matches ("captures"), in the order they appear in the | |
134 | pattern. ``false`` is returned for sub-patterns that did not participate in | |
135 | the match. If the pattern specified no captures then the whole matched | |
136 | substring is returned. | |
137 | ||
138 | **Returns on failure:** | |
139 | 1. ``nil`` | |
140 | ||
141 | ------------------------------------------------------------ | |
142 | ||
143 | find | |
144 | ---- | |
145 | ||
146 | :funcdef:`rex.find (subj, patt, [init], [cf], [ef], [larg...])` | |
147 | ||
148 | or | |
149 | ||
150 | :funcdef:`r:find (subj, [init], [ef])` | |
151 | ||
152 | The function searches for the first match of the regexp *patt* in the string | |
153 | *subj*, starting from offset *init*, subject to flags *cf* and *ef*. | |
154 | ||
155 | +---------+-------------------------------+--------+-------------+ | |
156 | |Parameter| Description | Type |Default Value| | |
157 | +=========+===============================+========+=============+ | |
158 | | r |regex object produced by new |userdata| n/a | | |
159 | +---------+-------------------------------+--------+-------------+ | |
160 | | subj |subject | string | n/a | | |
161 | +---------+-------------------------------+--------+-------------+ | |
162 | | patt |regular expression pattern |string | n/a | | |
163 | | | |or | | | |
164 | | | |userdata| | | |
165 | +---------+-------------------------------+--------+-------------+ | |
166 | | [init] |start offset in the subject | number | 1 | | |
167 | | |(can be negative) | | | | |
168 | +---------+-------------------------------+--------+-------------+ | |
169 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
170 | +---------+-------------------------------+--------+-------------+ | |
171 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
172 | +---------+-------------------------------+--------+-------------+ | |
173 | |[larg...]|library-specific arguments | | | | |
174 | +---------+-------------------------------+--------+-------------+ | |
175 | ||
176 | **Returns on success:** | |
177 | 1. The start point of the match (a number). | |
178 | 2. The end point of the match (a number). | |
179 | 3. All substring matches ("captures"), in the order they appear in the | |
180 | pattern. ``false`` is returned for sub-patterns that did not participate in | |
181 | the match. | |
182 | ||
183 | **Returns on failure:** | |
184 | 1. ``nil`` | |
185 | ||
186 | ------------------------------------------------------------ | |
187 | ||
188 | gmatch | |
189 | ------ | |
190 | ||
191 | :funcdef:`rex.gmatch (subj, patt, [cf], [ef], [larg...])` | |
192 | ||
193 | The function is intended for use in the *generic for* Lua construct. | |
194 | It returns an iterator for repeated matching of the pattern *patt* in | |
195 | the string *subj*, subject to flags *cf* and *ef*. | |
196 | ||
197 | +---------+-------------------------------+--------+-------------+ | |
198 | |Parameter| Description | Type |Default Value| | |
199 | +=========+===============================+========+=============+ | |
200 | | subj |subject |string | n/a | | |
201 | +---------+-------------------------------+--------+-------------+ | |
202 | | patt |regular expression pattern |string | n/a | | |
203 | | | |or | | | |
204 | | | |userdata| | | |
205 | +---------+-------------------------------+--------+-------------+ | |
206 | | [cf] |compilation flags (bitwise OR) |number | cf_ | | |
207 | +---------+-------------------------------+--------+-------------+ | |
208 | | [ef] |execution flags (bitwise OR) |number | ef_ | | |
209 | +---------+-------------------------------+--------+-------------+ | |
210 | |[larg...]|library-specific arguments | | | | |
211 | +---------+-------------------------------+--------+-------------+ | |
212 | ||
213 | The iterator function is called by Lua. On every iteration (that is, on every | |
214 | match), it returns all captures in the order they appear in the pattern (or the | |
215 | entire match if the pattern specified no captures). The iteration will continue | |
216 | till the subject fails to match. | |
217 | ||
218 | ------------------------------------------------------------ | |
219 | ||
220 | gsub | |
221 | ---- | |
222 | ||
223 | :funcdef:`rex.gsub (subj, patt, repl, [n], [cf], [ef], [larg...])` | |
224 | ||
225 | This function searches for all matches of the pattern *patt* in the string | |
226 | *subj* and replaces them according to the parameters *repl* and *n* (see details | |
227 | below). | |
228 | ||
229 | +---------+-----------------------------------+--------------------------+-------------+ | |
230 | |Parameter| Description | Type |Default Value| | |
231 | +=========+===================================+==========================+=============+ | |
232 | | subj |subject | string | n/a | | |
233 | +---------+-----------------------------------+--------------------------+-------------+ | |
234 | | patt |regular expression pattern |string or userdata | n/a | | |
235 | +---------+-----------------------------------+--------------------------+-------------+ | |
236 | | repl |substitution source |string, function or table | n/a | | |
237 | +---------+-----------------------------------+--------------------------+-------------+ | |
238 | | [n] |maximum number of matches to search| number or function | ``nil`` | | |
239 | | |for, or control function, or nil | | | | |
240 | +---------+-----------------------------------+--------------------------+-------------+ | |
241 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
242 | +---------+-----------------------------------+--------------------------+-------------+ | |
243 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
244 | +---------+-----------------------------------+--------------------------+-------------+ | |
245 | |[larg...]|library-specific arguments | | | | |
246 | +---------+-----------------------------------+--------------------------+-------------+ | |
247 | ||
248 | **Returns:** | |
249 | 1. The subject string with the substitutions made. | |
250 | 2. Number of matches found. | |
251 | 3. Number of substitutions made. | |
252 | ||
253 | **Details:** | |
254 | The parameter *repl* can be either a string, a function or a table. | |
255 | On each match made, it is converted into a value *repl_out* that may be used | |
256 | for the replacement. | |
257 | ||
258 | *repl_out* is generated differently depending on the type of *repl*: | |
259 | ||
260 | 1. If *repl* is a *string* then it is treated as a template for substitution, | |
261 | where the %X occurences in *repl* are handled in a special way, depending | |
262 | on the value of the character X: | |
263 | ||
264 | * if X represents a digit, then each %X occurence is substituted by the | |
265 | value of the X-th submatch (capture), with the following cases handled | |
266 | specially: | |
267 | ||
268 | * each %0 is substituted by the entire match | |
269 | * if the pattern contains no captures, then each %1 is substituted by the | |
270 | entire match | |
271 | * any other %X where X is greater than the number of captures in the | |
272 | pattern will generate an error ("invalid capture index") | |
273 | * if the pattern does contain a capture with number X but that capture | |
274 | didn't participate in the match, then %X is substituted by an empty | |
275 | string | |
276 | ||
277 | * if X is any non-digit character then %X is substituted by X | |
278 | ||
279 | All parts of *repl* other than %X are copied to *repl_out* verbatim. | |
280 | ||
281 | 2. If *repl* is a *function* then it is called on each match with the | |
282 | submatches passed as parameters (if there are no submatches then the entire | |
283 | match is passed as the only parameter). *repl_out* is the return value of | |
284 | the *repl* call, and is interpreted as follows: | |
285 | ||
286 | * if it is a string or a number (coerced to a string), then the replacement | |
287 | value is that string; | |
288 | * if it is a ``nil`` or a ``false``, then no replacement is to be done; | |
289 | ||
290 | 3. If *repl* is a table then *repl_out* is *repl* [m1], where m1 is the first | |
291 | submatch (or the entire match if there are no submatches), following the | |
292 | same rules as for the return value of *repl* call, described in the above | |
293 | paragraph. | |
294 | ||
295 | Note: Under some circumstances, the value of *repl_out* may be ignored; see | |
296 | below_. | |
297 | ||
298 | gsub behaves differently depending on the type of *n*: | |
299 | ||
300 | 1. If *n* is a *number* then it is treated as the maximum number of matches | |
301 | to search for (an omitted or ``nil`` value means an unlimited number of | |
302 | matches). On each match, the replacement value is the *repl_out* string | |
303 | (see above). | |
304 | ||
305 | .. _below: | |
306 | ||
307 | 2. If *n* is a function, then it is called on each match, after *repl_out* is | |
308 | produced (so if *repl* is a function, it will be called prior to the *n* | |
309 | call). | |
310 | ||
311 | *n* receives 3 arguments and returns 2 values. Its arguments are: | |
312 | ||
313 | 1. The start offset of the match (a number) | |
314 | 2. The end offset of the match (a number) | |
315 | 3. *repl_out* | |
316 | ||
317 | The type of its first return controls the replacement produced by gsub for | |
318 | the current match: | |
319 | ||
320 | * ``true`` -- replace/don't replace, according to *repl_out*; | |
321 | * ``nil``/``false`` -- don't replace; | |
322 | * a string (or a number coerced to a string) -- replace by that string; | |
323 | ||
324 | The type of its second return controls gsub behavior after the current | |
325 | match is handled: | |
326 | ||
327 | * ``nil``/``false`` -- no changes: *n* will be called on the next match; | |
328 | * ``true`` -- search for an unlimited number of matches; *n* will not be | |
329 | called again; | |
330 | * a number -- maximum number of matches to search for, beginning from the | |
331 | next match; *n* will not be called again; | |
332 | ||
333 | ------------------------------------------------------------ | |
334 | ||
335 | split | |
336 | ----- | |
337 | ||
338 | :funcdef:`rex.split (subj, sep, [cf], [ef], [larg...])` | |
339 | ||
340 | The function is intended for use in the *generic for* Lua construct. | |
341 | It is used for splitting a subject string *subj* into parts (*sections*). | |
342 | The *sep* parameter is a regular expression pattern representing | |
343 | **separators** between the sections. | |
344 | ||
345 | The function returns an iterator for repeated matching of the pattern *sep* in | |
346 | the string *subj*, subject to flags *cf* and *ef*. | |
347 | ||
348 | +---------+-------------------------------+--------+-------------+ | |
349 | |Parameter| Description | Type |Default Value| | |
350 | +=========+===============================+========+=============+ | |
351 | | subj |subject |string | n/a | | |
352 | +---------+-------------------------------+--------+-------------+ | |
353 | | sep |separator (regular expression |string | n/a | | |
354 | | |pattern) |or | | | |
355 | | | |userdata| | | |
356 | +---------+-------------------------------+--------+-------------+ | |
357 | | [cf] |compilation flags (bitwise OR) |number | cf_ | | |
358 | +---------+-------------------------------+--------+-------------+ | |
359 | | [ef] |execution flags (bitwise OR) |number | ef_ | | |
360 | +---------+-------------------------------+--------+-------------+ | |
361 | |[larg...]|library-specific arguments | | | | |
362 | +---------+-------------------------------+--------+-------------+ | |
363 | ||
364 | **On every iteration pass, the iterator returns:** | |
365 | ||
366 | 1. A subject section (can be an empty string), followed by | |
367 | 2. All captures in the order they appear in the *sep* pattern (or the entire | |
368 | match if the *sep* pattern specified no captures). If there is no match | |
369 | (this can occur only in the last iteration), then nothing is returned after | |
370 | the subject section. | |
371 | ||
372 | The iteration will continue till the end of the subject. Unlike gmatch_, there | |
373 | will always be at least one iteration pass, even if there are no matches in the | |
374 | subject. | |
375 | ||
376 | ------------------------------------------------------------ | |
377 | ||
378 | count | |
379 | ----- | |
380 | ||
381 | :funcdef:`rex.count (subj, patt, [cf], [ef], [larg...])` | |
382 | ||
383 | This function counts matches of the pattern *patt* in the string *subj*. | |
384 | ||
385 | +---------+-----------------------------------+--------------------------+-------------+ | |
386 | |Parameter| Description | Type |Default Value| | |
387 | +=========+===================================+==========================+=============+ | |
388 | | subj |subject | string | n/a | | |
389 | +---------+-----------------------------------+--------------------------+-------------+ | |
390 | | patt |regular expression pattern |string or userdata | n/a | | |
391 | +---------+-----------------------------------+--------------------------+-------------+ | |
392 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
393 | +---------+-----------------------------------+--------------------------+-------------+ | |
394 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
395 | +---------+-----------------------------------+--------------------------+-------------+ | |
396 | |[larg...]|library-specific arguments | | | | |
397 | +---------+-----------------------------------+--------------------------+-------------+ | |
398 | ||
399 | **Returns:** | |
400 | 1. Number of matches found. | |
401 | ||
402 | ------------------------------------------------------------ | |
403 | ||
404 | flags | |
405 | ----- | |
406 | ||
407 | :funcdef:`rex.flags ([tb])` | |
408 | ||
409 | This function returns a table containing the numeric values of the constants | |
410 | defined by the used regex library, with the keys being the (string) names of the | |
411 | constants. If the table argument *tb* is supplied then it is used as the output | |
412 | table, otherwise a new table is created. | |
413 | ||
414 | The constants contained in the returned table can then be used in most functions | |
415 | and methods where *compilation flags* or *execution flags* can be specified. | |
416 | They can also be used for comparing with return codes of some functions and | |
417 | methods for determining the reason of failure. For details, see the relevant | |
418 | regex library's documentation. | |
419 | ||
420 | +---------+--------------------------------+--------+-------------+ | |
421 | |Parameter| Description | Type |Default Value| | |
422 | +=========+================================+========+=============+ | |
423 | | [tb] |a table for placing results into| table | ``nil`` | | |
424 | +---------+--------------------------------+--------+-------------+ | |
425 | ||
426 | **Returns:** | |
427 | 1. A table filled with the results. | |
428 | ||
429 | **Notes:** | |
430 | The keys in the `tb` table are formed from the names of the corresponding | |
431 | constants in the used library. They are formed as follows: | |
432 | ||
433 | * **POSIX**, **TRE**: prefix REG\_ is omitted, e.g. REG_ICASE becomes ``"ICASE"``. | |
434 | * **PCRE:** prefix PCRE\_ is omitted, e.g. PCRE_CASELESS becomes ``"CASELESS"``. | |
435 | * **PCRE2:** prefix PCRE2\_ is omitted, e.g. PCRE2_CASELESS becomes ``"CASELESS"``. | |
436 | * **Oniguruma:** names of constants are converted to strings with no alteration, | |
437 | but for ONIG_OPTION_xxx constants, alias strings are created additionally, | |
438 | e.g., the value of ONIG_OPTION_IGNORECASE constant becomes accessible via | |
439 | either of two keys: ``"ONIG_OPTION_IGNORECASE"`` and ``"IGNORECASE"``. | |
440 | * **GNU**: the GNU library provides the flags ``not_bol``, which stops a | |
441 | beginning-of-line anchor from matching at the start of a string, ``not_eol``, | |
442 | which stops an end-of-line anchor from matching at the end of a string, and | |
443 | ``backward`` which causes the search to be performed backwards (that is, the | |
444 | pattern is matched from positions starting at the end of the string; however, | |
445 | the matches themselves are still made forwards), as well as the RE_xxx syntax | |
446 | specifiers (as defined in regex.h), omitting the RE\_ prefix. For example, | |
447 | RE_SYNTAX_GREP becomes ``SYNTAX_GREP`` in Lua. | |
448 | ||
449 | ------------------------------------------------------------ | |
450 | ||
451 | new | |
452 | --- | |
453 | ||
454 | :funcdef:`rex.new (patt, [cf], [larg...])` | |
455 | ||
456 | The function compiles regular expression *patt* into a regular expression object | |
457 | whose internal representation is corresponding to the library used. The returned | |
458 | result then can be used by the methods, e.g. `tfind`_, `exec`_, etc. Regular | |
459 | expression objects are automatically garbage collected. See the library-specific | |
460 | documentation below for details of the library-specific arguments *larg...*, if | |
461 | any. | |
462 | ||
463 | +---------+-------------------------------+--------+-------------+ | |
464 | |Parameter| Description | Type |Default Value| | |
465 | +=========+===============================+========+=============+ | |
466 | | patt |regular expression pattern | string | n/a | | |
467 | +---------+-------------------------------+--------+-------------+ | |
468 | | [cf] |compilation flags (bitwise OR) | number | cf_ | | |
469 | +---------+-------------------------------+--------+-------------+ | |
470 | |[larg...]|library-specific arguments | | | | |
471 | +---------+-------------------------------+--------+-------------+ | |
472 | ||
473 | **Returns:** | |
474 | 1. Compiled regular expression (a userdata). | |
475 | ||
476 | ------------------------------------------------------------ | |
477 | ||
478 | tfind | |
479 | ----- | |
480 | ||
481 | :funcdef:`r:tfind (subj, [init], [ef])` | |
482 | ||
483 | The method searches for the first match of the compiled regexp *r* in the | |
484 | string *subj*, starting from offset *init*, subject to execution flags *ef*. | |
485 | ||
486 | +---------+-----------------------------------+--------+-------------+ | |
487 | |Parameter| Description | Type |Default Value| | |
488 | +=========+===================================+========+=============+ | |
489 | | r |regex object produced by new |userdata| n/a | | |
490 | +---------+-----------------------------------+--------+-------------+ | |
491 | | subj |subject | string | n/a | | |
492 | +---------+-----------------------------------+--------+-------------+ | |
493 | | [init] |start offset in the subject | number | 1 | | |
494 | | |(can be negative) | | | | |
495 | +---------+-----------------------------------+--------+-------------+ | |
496 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
497 | +---------+-----------------------------------+--------+-------------+ | |
498 | ||
499 | **Returns on success:** | |
500 | 1. The start point of the match (a number). | |
501 | 2. The end point of the match (a number). | |
502 | 3. Substring matches ("captures" in Lua terminology) are returned as a third | |
503 | result, in a table. This table contains ``false`` in the positions where the | |
504 | corresponding sub-pattern did not participate in the match. | |
505 | ||
506 | 1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then | |
507 | the table also contains substring matches keyed by their correspondent | |
508 | subpattern names (strings). | |
509 | ||
510 | **Returns on failure:** | |
511 | 1. ``nil`` | |
512 | ||
513 | ------------------------------------------------------------ | |
514 | ||
515 | exec | |
516 | ---- | |
517 | ||
518 | :funcdef:`r:exec (subj, [init], [ef])` | |
519 | ||
520 | The method searches for the first match of the compiled regexp *r* in the | |
521 | string *subj*, starting from offset *init*, subject to execution flags *ef*. | |
522 | ||
523 | +---------+-----------------------------------+--------+-------------+ | |
524 | |Parameter| Description | Type |Default Value| | |
525 | +=========+===================================+========+=============+ | |
526 | | r |regex object produced by new |userdata| n/a | | |
527 | +---------+-----------------------------------+--------+-------------+ | |
528 | | subj |subject | string | n/a | | |
529 | +---------+-----------------------------------+--------+-------------+ | |
530 | | [init] |start offset in the subject | number | 1 | | |
531 | | |(can be negative) | | | | |
532 | +---------+-----------------------------------+--------+-------------+ | |
533 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
534 | +---------+-----------------------------------+--------+-------------+ | |
535 | ||
536 | **Returns on success:** | |
537 | 1. The start point of the first match (a number). | |
538 | 2. The end point of the first match (a number). | |
539 | 3. The offsets of substring matches ("captures" in Lua terminology) are | |
540 | returned as a third result, in a table. This table contains ``false`` in the | |
541 | positions where the corresponding sub-pattern did not participate in the | |
542 | match. | |
543 | ||
544 | 1. **PCRE**, **PCRE2**, **Oniguruma**: if *named subpatterns* are used then | |
545 | the table also contains substring matches keyed by their correspondent | |
546 | subpattern names (strings). | |
547 | ||
548 | **Returns on failure:** | |
549 | 1. ``nil`` | |
550 | ||
551 | **Example:** | |
552 | If the whole match is at offsets 10,20 and substring matches are at offsets | |
553 | 12,14 and 16,19 then the function returns the following: 10, 20, | |
554 | { 12,14,16,19 }. | |
555 | ||
556 | ------------------------------------------------------------ | |
557 | ||
558 | PCRE-only functions and methods | |
559 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
560 | ||
561 | new | |
562 | --- | |
563 | ||
564 | :funcdef:`rex.new (patt, [cf], [lo])` | |
565 | ||
566 | The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a | |
567 | userdata obtained from a call to maketables__. The default value, used when | |
568 | the parameter is not supplied or ``nil``, is the built-in PCRE set of character | |
569 | tables. | |
570 | ||
571 | __ maketables_pcre_ | |
572 | ||
573 | ------------------------------------------------------------ | |
574 | ||
575 | fullinfo | |
576 | -------- | |
577 | ||
578 | [See *pcre_fullinfo* in the PCRE docs.] | |
579 | ||
580 | :funcdef:`r:fullinfo ()` | |
581 | ||
582 | This function returns a table containing information about the compiled pattern. | |
583 | The keys are strings formed in the following way: | |
584 | ``PCRE_INFO_CAPTURECOUNT`` -> ``"CAPTURECOUNT"``. The values are numbers. | |
585 | ||
586 | ------------------------------------------------------------ | |
587 | ||
588 | .. _dfa_exec_pcre: | |
589 | ||
590 | dfa_exec | |
591 | -------- | |
592 | ||
593 | [PCRE 6.0 and later. See *pcre_dfa_exec* in the PCRE docs.] | |
594 | ||
595 | :funcdef:`r:dfa_exec (subj, [init], [ef], [ovecsize], [wscount])` | |
596 | ||
597 | The method matches a compiled regular expression *r* against a given subject | |
598 | string *subj*, using a DFA matching algorithm. | |
599 | ||
600 | +----------+-------------------------------------+--------+-------------+ | |
601 | |Parameter | Description | Type |Default Value| | |
602 | +==========+=====================================+========+=============+ | |
603 | | r |regex object produced by new |userdata| n/a | | |
604 | +----------+-------------------------------------+--------+-------------+ | |
605 | | subj |subject | string | n/a | | |
606 | +----------+-------------------------------------+--------+-------------+ | |
607 | | [init] |start offset in the subject | number | 1 | | |
608 | | |(can be negative) | | | | |
609 | +----------+-------------------------------------+--------+-------------+ | |
610 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
611 | +----------+-------------------------------------+--------+-------------+ | |
612 | |[ovecsize]|size of the array for result offsets | number | 100 | | |
613 | +----------+-------------------------------------+--------+-------------+ | |
614 | |[wscount] |number of elements in the working | number | 50 | | |
615 | | |space array | | | | |
616 | +----------+-------------------------------------+--------+-------------+ | |
617 | ||
618 | **Returns on success (either full or partial match):** | |
619 | 1. The start point of the matches found (a number). | |
620 | 2. A table containing the end points of the matches found, the longer matches | |
621 | first. | |
622 | 3. The return value of the underlying *pcre_dfa_exec* call (a number). | |
623 | ||
624 | **Returns on failure (no match):** | |
625 | 1. ``nil`` | |
626 | ||
627 | **Example:** | |
628 | If there are 3 matches found starting at offset 10 and ending at offsets 15, 20 | |
629 | and 25 then the function returns the following: 10, { 25,20,15 }, 3. | |
630 | ||
631 | ------------------------------------------------------------ | |
632 | ||
633 | .. _maketables_pcre: | |
634 | ||
635 | maketables | |
636 | ---------- | |
637 | ||
638 | [See *pcre_maketables* in the PCRE docs.] | |
639 | ||
640 | :funcdef:`rex_pcre.maketables ()` | |
641 | ||
642 | Creates a set of character tables corresponding to the current locale and | |
643 | returns it as a userdata. The returned value can be passed to any Lrexlib | |
644 | function accepting the *locale* parameter. | |
645 | ||
646 | ------------------------------------------------------------ | |
647 | ||
648 | config | |
649 | ------ | |
650 | ||
651 | [PCRE 4.0 and later. See *pcre_config* in the PCRE docs.] | |
652 | ||
653 | :funcdef:`rex_pcre.config ([tb])` | |
654 | ||
655 | This function returns a table containing the values of the configuration | |
656 | parameters used at PCRE library build-time. Those parameters (numbers) are | |
657 | keyed by their names (strings). If the table argument *tb* is supplied then it | |
658 | is used as the output table, else a new table is created. | |
659 | ||
660 | ------------------------------------------------------------ | |
661 | ||
662 | .. _version: | |
663 | ||
664 | version | |
665 | ------- | |
666 | ||
667 | [See *pcre_version* in the PCRE docs.] | |
668 | ||
669 | :funcdef:`rex_pcre.version ()` | |
670 | ||
671 | This function returns a string containing the version of the used PCRE library | |
672 | and its release date. | |
673 | ||
674 | ------------------------------------------------------------ | |
675 | ||
676 | PCRE2-only functions and methods | |
677 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
678 | ||
679 | new | |
680 | --- | |
681 | ||
682 | :funcdef:`rex.new (patt, [cf], [lo])` | |
683 | ||
684 | The locale (*lo*) can be either a string (e.g., "French_France.1252"), or a | |
685 | userdata obtained from a call to maketables__. The default value, used when | |
686 | the parameter is not supplied or ``nil``, is the built-in PCRE2 set of character | |
687 | tables. | |
688 | ||
689 | __ maketables_pcre2_ | |
690 | ||
691 | ------------------------------------------------------------ | |
692 | ||
693 | patterninfo | |
694 | ----------- | |
695 | ||
696 | [See *pcre2_patterninfo* in the PCRE2 docs.] | |
697 | ||
698 | :funcdef:`r:patterninfo ()` | |
699 | ||
700 | This function returns a table containing information about the compiled pattern. | |
701 | The keys are strings formed in the following way: | |
702 | ``PCRE2_INFO_CAPTURECOUNT`` -> ``"CAPTURECOUNT"``. The values are numbers. | |
703 | ||
704 | ------------------------------------------------------------ | |
705 | ||
706 | dfa_exec | |
707 | -------- | |
708 | ||
709 | [See *pcre2_dfa_exec* in the PCRE2 docs.] | |
710 | ||
711 | :funcdef:`r:dfa_exec (subj, [init], [ef], [ovecsize], [wscount])` | |
712 | ||
713 | The method matches a compiled regular expression *r* against a given subject | |
714 | string *subj*, using a DFA matching algorithm. | |
715 | ||
716 | +----------+-------------------------------------+--------+-------------+ | |
717 | |Parameter | Description | Type |Default Value| | |
718 | +==========+=====================================+========+=============+ | |
719 | | r |regex object produced by new |userdata| n/a | | |
720 | +----------+-------------------------------------+--------+-------------+ | |
721 | | subj |subject | string | n/a | | |
722 | +----------+-------------------------------------+--------+-------------+ | |
723 | | [init] |start offset in the subject | number | 1 | | |
724 | | |(can be negative) | | | | |
725 | +----------+-------------------------------------+--------+-------------+ | |
726 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
727 | +----------+-------------------------------------+--------+-------------+ | |
728 | |[ovecsize]|size of the array for result offsets | number | 100 | | |
729 | +----------+-------------------------------------+--------+-------------+ | |
730 | |[wscount] |number of elements in the working | number | 50 | | |
731 | | |space array | | | | |
732 | +----------+-------------------------------------+--------+-------------+ | |
733 | ||
734 | **Returns on success (either full or partial match):** | |
735 | 1. The start point of the matches found (a number). | |
736 | 2. A table containing the end points of the matches found, the longer matches | |
737 | first. | |
738 | 3. The return value of the underlying *pcre_dfa_exec* call (a number). | |
739 | ||
740 | **Returns on failure (no match):** | |
741 | 1. ``nil`` | |
742 | ||
743 | **Example:** | |
744 | If there are 3 matches found starting at offset 10 and ending at offsets 15, 20 | |
745 | and 25 then the function returns the following: 10, { 25,20,15 }, 3. | |
746 | ||
747 | ------------------------------------------------------------ | |
748 | ||
749 | jit_compile | |
750 | ----------- | |
751 | ||
752 | [See *pcre2_jit_compile* in the PCRE2 docs.] | |
753 | ||
754 | :funcdef:`r:jit_compile ([options])` | |
755 | ||
756 | Parameter *options* is a number (a bitwise OR of separate options; | |
757 | it defaults to ``PCRE2_JIT_COMPLETE``). | |
758 | ||
759 | The method returns ``true`` on success or ``false`` + error message string on failure. | |
760 | ||
761 | ------------------------------------------------------------ | |
762 | ||
763 | .. _maketables_pcre2: | |
764 | ||
765 | maketables | |
766 | ---------- | |
767 | ||
768 | [See *pcre2_maketables* in the PCRE2 docs.] | |
769 | ||
770 | :funcdef:`rex_pcre2.maketables ()` | |
771 | ||
772 | Creates a set of character tables corresponding to the current locale and | |
773 | returns it as a userdata. The returned value can be passed to any Lrexlib | |
774 | function accepting the *locale* parameter. | |
775 | ||
776 | ------------------------------------------------------------ | |
777 | ||
778 | config | |
779 | ------ | |
780 | ||
781 | [See *pcre2_config* in the PCRE2 docs.] | |
782 | ||
783 | :funcdef:`rex_pcre2.config ([tb])` | |
784 | ||
785 | This function returns a table containing the values of the configuration | |
786 | parameters used at PCRE2 library build-time. Those parameters (numbers) are | |
787 | keyed by their names (strings). If the table argument *tb* is supplied then it | |
788 | is used as the output table, else a new table is created. | |
789 | ||
790 | ------------------------------------------------------------ | |
791 | ||
792 | version | |
793 | ------- | |
794 | ||
795 | [See *pcre2_config(PCRE2_CONFIG_VERSION)* in the PCRE2 docs.] | |
796 | ||
797 | :funcdef:`rex_pcre2.version ()` | |
798 | ||
799 | This function returns a string containing the version of the used PCRE2 library | |
800 | and its release date. | |
801 | ||
802 | ------------------------------------------------------------ | |
803 | ||
804 | GNU-only functions and methods | |
805 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
806 | ||
807 | new | |
808 | --- | |
809 | ||
810 | :funcdef:`rex.new (patt, [cf], [tr])` | |
811 | ||
812 | If the compilation flags (*cf*) are not supplied or ``nil``, the default syntax | |
813 | is ``SYNTAX_POSIX_EXTENDED``. Note that this is not the same as passing a value | |
814 | of zero, which is the same as ``SYNTAX_EMACS``. | |
815 | ||
816 | The *translation* parameter (*tr*) is a map of eight-bit character codes (0 to | |
817 | 255 inclusive) to 8-bit characters (strings). If this parameter is given, the | |
818 | pattern is translated at compilation time, and each string to be matched is | |
819 | translated when it is being matched. | |
820 | ||
821 | ||
822 | Oniguruma-only functions and methods | |
823 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
824 | ||
825 | new | |
826 | --- | |
827 | ||
828 | :funcdef:`rex.new (patt, [cf], [enc], [syn])` | |
829 | ||
830 | The *encoding* parameter (*enc*) must be one of the predefined strings that are | |
831 | formed from the ONIG_ENCODING_xxx identifiers defined in oniguruma.h, by means | |
832 | of omitting the ONIG_ENCODING\_ part. For example, ONIG_ENCODING_UTF8 becomes | |
833 | ``"UTF8"`` on the Lua side. The default value, used when the parameter is not | |
834 | supplied or ``nil``, is ``"ASCII"``. | |
835 | ||
836 | If the caller-supplied value of this parameter is not one of the predefined | |
837 | "encoding" string set, an error is raised. | |
838 | ||
839 | The *syntax* parameter (*syn*) must be one of the predefined strings that are | |
840 | formed from the ONIG_SYNTAX_xxx identifiers defined in oniguruma.h, by means of | |
841 | omitting the ONIG_SYNTAX\_ part. For example, ONIG_SYNTAX_JAVA becomes | |
842 | ``"JAVA"`` on the Lua side. The default value, used when the parameter is not | |
843 | supplied or ``nil``, is either ``"RUBY"`` (at start-up), or the value set by the | |
844 | last setdefaultsyntax_ call. | |
845 | ||
846 | If the caller-supplied value of `syntax` parameter is not one of the predefined | |
847 | "syntax" string set, an error is raised. | |
848 | ||
849 | setdefaultsyntax | |
850 | ---------------- | |
851 | ||
852 | :funcdef:`rex_onig.setdefaultsyntax (syntax)` | |
853 | ||
854 | This function sets the default syntax for the Oniguruma library, according to the | |
855 | value of the string syntax. The specified syntax will be further used for | |
856 | interpreting string regex patterns by all relevant functions, unless the *syntax* | |
857 | argument is passed to those functions explicitly. | |
858 | ||
859 | **Returns:** nothing | |
860 | ||
861 | **Examples:** | |
862 | ||
863 | 1. ``rex_onig.setdefaultsyntax ("ASIS") -- use plain text syntax as the default`` | |
864 | 2. ``rex_onig.setdefaultsyntax ("PERL") -- use PERL regex syntax as the default`` | |
865 | ||
866 | ------------------------------------------------------------ | |
867 | ||
868 | version | |
869 | ------- | |
870 | ||
871 | [See *onig_version* in the Oniguruma docs.] | |
872 | ||
873 | :funcdef:`rex_onig.version ()` | |
874 | ||
875 | This function returns a string containing the version of the used Oniguruma | |
876 | library. | |
877 | ||
878 | ------------------------------------------------------------ | |
879 | ||
880 | capturecount | |
881 | ------------ | |
882 | ||
883 | [See *onig_number_of_captures* in the Oniguruma docs.] | |
884 | ||
885 | :funcdef:`r:capturecount ()` | |
886 | ||
887 | Returns the number of captures in the pattern. | |
888 | ||
889 | ------------------------------------------------------------ | |
890 | ||
891 | TRE-only functions and methods | |
892 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
893 | ||
894 | new | |
895 | --- | |
896 | ||
897 | :funcdef:`rex.new (patt, [cf])` | |
898 | ||
899 | atfind | |
900 | ------- | |
901 | ||
902 | :funcdef:`r:atfind (subj, params, [init], [ef])` | |
903 | ||
904 | The method searches for the first match of the compiled regexp *r* in the | |
905 | string *subj*, starting from offset *init*, subject to execution flags *ef*. | |
906 | ||
907 | +---------+-----------------------------------+--------+-------------+ | |
908 | |Parameter| Description | Type |Default Value| | |
909 | +=========+===================================+========+=============+ | |
910 | | r |regex object produced by new |userdata| n/a | | |
911 | +---------+-----------------------------------+--------+-------------+ | |
912 | | subj |subject | string | n/a | | |
913 | +---------+-----------------------------------+--------+-------------+ | |
914 | | params |Approximate matching parameters. | table |n/a | | |
915 | | |The values are integers. | | | | |
916 | | |The valid string key values are: | |(Default | | |
917 | | |``cost_ins``, ``cost_del``, | |value for | | |
918 | | |``cost_subst``, ``max_cost``, | |a missing | | |
919 | | |``max_ins``, ``max_del``, | |field is 0) | | |
920 | | |``max_subst``, ``max_err`` | | | | |
921 | +---------+-----------------------------------+--------+-------------+ | |
922 | | [init] |start offset in the subject | number | 1 | | |
923 | | |(can be negative) | | | | |
924 | +---------+-----------------------------------+--------+-------------+ | |
925 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
926 | +---------+-----------------------------------+--------+-------------+ | |
927 | ||
928 | **Returns on success:** | |
929 | 1. The start point of the match (a number). | |
930 | 2. The end point of the match (a number). | |
931 | 3. Substring matches ("captures" in Lua terminology) are returned as a third | |
932 | result, in the array part of a table. Positions where the corresponding | |
933 | sub-pattern did not participate in the match contain ``false``. | |
934 | The hash part of the table contains additional information on the match, | |
935 | in the following fields: ``cost``, ``num_ins``, ``num_del`` and ``num_subst``. | |
936 | ||
937 | **Returns on failure:** | |
938 | 1. ``nil`` | |
939 | ||
940 | ------------------------------------------------------------ | |
941 | ||
942 | aexec | |
943 | ------ | |
944 | ||
945 | :funcdef:`r:aexec (subj, params, [init], [ef])` | |
946 | ||
947 | The method searches for the first match of the compiled regexp *r* in the | |
948 | string *subj*, starting from offset *init*, subject to execution flags *ef*. | |
949 | ||
950 | +---------+-----------------------------------+--------+-------------+ | |
951 | |Parameter| Description | Type |Default Value| | |
952 | +=========+===================================+========+=============+ | |
953 | | r |regex object produced by new |userdata| n/a | | |
954 | +---------+-----------------------------------+--------+-------------+ | |
955 | | subj |subject | string | n/a | | |
956 | +---------+-----------------------------------+--------+-------------+ | |
957 | | params |Approximate matching parameters. | table |n/a | | |
958 | | |The values are integers. | | | | |
959 | | |The valid string key values are: | |(Default | | |
960 | | |``cost_ins``, ``cost_del``, | |value for | | |
961 | | |``cost_subst``, ``max_cost``, | |a missing | | |
962 | | |``max_ins``, ``max_del``, | |field is 0) | | |
963 | | |``max_subst``, ``max_err`` | | | | |
964 | +---------+-----------------------------------+--------+-------------+ | |
965 | | [init] |start offset in the subject | number | 1 | | |
966 | | |(can be negative) | | | | |
967 | +---------+-----------------------------------+--------+-------------+ | |
968 | | [ef] |execution flags (bitwise OR) | number | ef_ | | |
969 | +---------+-----------------------------------+--------+-------------+ | |
970 | ||
971 | **Returns on success:** | |
972 | 1. The start point of the first match (a number). | |
973 | 2. The end point of the first match (a number). | |
974 | 3. The offsets of substring matches ("captures" in Lua terminology) are | |
975 | returned as a third result, in the array part of a table. Positions where | |
976 | the corresponding sub-pattern did not participate in the match contain | |
977 | ``false``. The hash part of the table contains additional information on | |
978 | the match, in the following fields: ``cost``, ``num_ins``, ``num_del`` and | |
979 | ``num_subst``. | |
980 | ||
981 | **Returns on failure:** | |
982 | 1. ``nil`` | |
983 | ||
984 | ------------------------------------------------------------ | |
985 | ||
986 | have_approx | |
987 | ------------ | |
988 | ||
989 | :funcdef:`r:have_approx ()` | |
990 | ||
991 | The method returns ``true`` if the compiled pattern uses approximate matching, | |
992 | and ``false`` if not. | |
993 | ||
994 | ------------------------------------------------------------ | |
995 | ||
996 | have_backrefs | |
997 | -------------- | |
998 | ||
999 | :funcdef:`r:have_backrefs ()` | |
1000 | ||
1001 | The method returns ``true`` if the compiled pattern has back references, | |
1002 | and ``false`` if not. | |
1003 | ||
1004 | ------------------------------------------------------------ | |
1005 | ||
1006 | config | |
1007 | ------ | |
1008 | ||
1009 | [See *tre_config* in the TRE docs.] | |
1010 | ||
1011 | :funcdef:`rex_tre.config ([tb])` | |
1012 | ||
1013 | This function returns a table containing the values of the configuration | |
1014 | parameters used at TRE library build-time. Those parameters are | |
1015 | keyed by their names. If the table argument *tb* is supplied then it | |
1016 | is used as the output table, else a new table is created. | |
1017 | ||
1018 | ------------------------------------------------------------ | |
1019 | ||
1020 | rex_tre.version | |
1021 | --------------- | |
1022 | ||
1023 | [See *tre_version* in the TRE docs.] | |
1024 | ||
1025 | :funcdef:`rex_tre.version ()` | |
1026 | ||
1027 | This function returns a string containing the version of the used TRE library. | |
1028 | ||
1029 | ------------------------------------------------------------ | |
1030 | ||
1031 | Incompatibilities with previous versions | |
1032 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
1033 | ||
1034 | **Incompatibilities between versions 2.8 and 2.7:** | |
1035 | ||
1036 | 1. In the functions searching for multiple matches every empty match adjacent | |
1037 | to the previous match is discarded. | |
1038 | ||
1039 | **Incompatibilities between versions 2.6 and 2.5:** | |
1040 | ||
1041 | 1. Removed function ``plainfind``. | |
1042 | #. Global variables (e.g. *rex_posix*, *rex_pcre*, etc.) are not created | |
1043 | by default. This can be changed at the stage of compilation by adding | |
1044 | ``-DREX_CREATEGLOBALVAR`` to ``CFLAGS``. | |
1045 | ||
1046 | **Incompatibilities between versions 2.2 and 2.1:** | |
1047 | ||
1048 | 1. gsub_: a special "break" return of *repl* function is deprecated. | |
1049 | #. (PCRE) gsub_, gmatch_: after finding an empty match at the current | |
1050 | position, the functions try to find a non-empty match anchored to the same | |
1051 | position. | |
1052 | ||
1053 | **Incompatibilities between versions 2.1 and 2.0:** | |
1054 | ||
1055 | 1. match_, find_, tfind_, exec_, dfa_exec__: only one value (a ``nil``) is | |
1056 | returned when the subject does not match the pattern. Any other failure | |
1057 | generates an error. | |
1058 | ||
1059 | __ dfa_exec_pcre_ | |
1060 | ||
1061 | **Incompatibilities between versions 2.0 and 1.19:** | |
1062 | ||
1063 | 1. Lua 5.1 is required | |
1064 | #. Functions ``newPCRE`` and ``newPOSIX`` renamed to new | |
1065 | #. Functions ``flagsPCRE`` and ``flagsPOSIX`` renamed to flags_ | |
1066 | #. Function ``versionPCRE`` renamed to version_ | |
1067 | #. Method ``match`` renamed to tfind_ | |
1068 | #. Method ``gmatch`` removed (similar functionality is provided by function | |
1069 | gmatch_) | |
1070 | #. Methods tfind_ and exec_: 2 values are returned on failure | |
1071 | #. (PCRE) exec_: the returned table may additionally contain *named | |
1072 | subpatterns* |
0 | -- Generate rockspecs from a prototype with variants | |
1 | ||
2 | local tree = require "std.tree" | |
3 | ||
4 | if select ("#", ...) < 2 then | |
5 | io.stderr:write "Usage: mkrockspecs PACKAGE VERSION\n" | |
6 | os.exit () | |
7 | end | |
8 | ||
9 | package_name = select (1, ...) | |
10 | version = select (2, ...) | |
11 | ||
12 | function format (x, indent) | |
13 | indent = indent or "" | |
14 | if type (x) == "table" then | |
15 | local s = "{\n" | |
16 | for i, v in pairs (x) do | |
17 | if type (i) ~= "number" then | |
18 | s = s..indent..i.." = "..format (v, indent.." ")..",\n" | |
19 | end | |
20 | end | |
21 | for i, v in ipairs (x) do | |
22 | s = s..indent..format (v, indent.." ")..",\n" | |
23 | end | |
24 | return s..indent:sub (1, -3).."}" | |
25 | elseif type (x) == "string" then | |
26 | return string.format ("%q", x) | |
27 | else | |
28 | return tostring (x) | |
29 | end | |
30 | end | |
31 | ||
32 | flavour = "" -- a global, visible in loadfile | |
33 | for f, spec in pairs (loadfile ("rockspecs.lua") ()) do | |
34 | if f ~= "default" then | |
35 | local specfile = package_name.."-"..(f ~= "" and f:lower ().."-" or "")..version.."-1.rockspec" | |
36 | h = io.open (specfile, "w") | |
37 | assert (h) | |
38 | flavour = f | |
39 | local specs = loadfile ("rockspecs.lua") () -- reload to get current flavour interpolated | |
40 | local spec = tree.merge (tree (specs.default), tree (specs[f])) | |
41 | local s = "" | |
42 | for i, v in pairs (spec) do | |
43 | s = s..i.." = "..format (v, " ").."\n" | |
44 | end | |
45 | h:write (s) | |
46 | h:close () | |
47 | os.execute ("luarocks lint " .. specfile) | |
48 | end | |
49 | end |
0 | -- Rockspec data | |
1 | ||
2 | -- Variables to be interpolated: | |
3 | -- | |
4 | -- flavour: regex library | |
5 | -- version | |
6 | ||
7 | local flavours = {"PCRE", "PCRE2", "POSIX", "oniguruma", "TRE", "GNU"} | |
8 | local version_dashed = version:gsub ("%.", "-") | |
9 | -- FIXME: PCRE2 define should be only in PCRE2 rockspec | |
10 | local defines = {"VERSION=\""..version.."\"", | |
11 | "PCRE2_CODE_UNIT_WIDTH=8"} | |
12 | ||
13 | -- FIXME: When Lua 5.1 support is dropped, use an env argument with | |
14 | -- loadfile instead of wrapping in a table | |
15 | return { | |
16 | ||
17 | default = { | |
18 | package = "Lrexlib-"..flavour, | |
19 | version = version.."-1", | |
20 | source = { | |
21 | url = "git://github.com/rrthomas/lrexlib.git", | |
22 | tag = "rel-"..version_dashed, | |
23 | }, | |
24 | description = { | |
25 | summary = "Regular expression library binding ("..flavour.." flavour).", | |
26 | detailed = [[ | |
27 | Lrexlib is a regular expression library for Lua 5.1-5.4, which | |
28 | provides bindings for several regular expression libraries. | |
29 | This rock provides the ]]..flavour..[[ bindings.]], | |
30 | homepage = "http://github.com/rrthomas/lrexlib", | |
31 | license = "MIT/X11" | |
32 | }, | |
33 | dependencies = { | |
34 | "lua >= 5.1" | |
35 | }, | |
36 | }, | |
37 | ||
38 | PCRE = { | |
39 | external_dependencies = { | |
40 | PCRE = { | |
41 | header = "pcre.h", | |
42 | library = "pcre" | |
43 | } | |
44 | }, | |
45 | build = { | |
46 | type = "builtin", | |
47 | modules = { | |
48 | rex_pcre = { | |
49 | defines = defines, | |
50 | sources = {"src/common.c", "src/pcre/lpcre.c", "src/pcre/lpcre_f.c"}, | |
51 | libraries = {"pcre"}, | |
52 | incdirs = {"$(PCRE_INCDIR)"}, | |
53 | libdirs = {"$(PCRE_LIBDIR)"} | |
54 | } | |
55 | } | |
56 | } | |
57 | }, | |
58 | ||
59 | PCRE2 = { | |
60 | external_dependencies = { | |
61 | PCRE2 = { | |
62 | header = "pcre2.h", | |
63 | library = "pcre2-8" | |
64 | } | |
65 | }, | |
66 | build = { | |
67 | type = "builtin", | |
68 | modules = { | |
69 | rex_pcre2 = { | |
70 | defines = defines, | |
71 | sources = {"src/common.c", "src/pcre2/lpcre2.c", "src/pcre2/lpcre2_f.c"}, | |
72 | libraries = {"pcre2-8"}, | |
73 | incdirs = {"$(PCRE2_INCDIR)"}, | |
74 | libdirs = {"$(PCRE2_LIBDIR)"} | |
75 | } | |
76 | } | |
77 | } | |
78 | }, | |
79 | ||
80 | POSIX = { | |
81 | external_dependencies = { | |
82 | POSIX = { | |
83 | header = "regex.h", | |
84 | } | |
85 | }, | |
86 | build = { | |
87 | type = "builtin", | |
88 | modules = { | |
89 | rex_posix = { | |
90 | defines = defines, | |
91 | sources = {"src/common.c", "src/posix/lposix.c"} | |
92 | } | |
93 | } | |
94 | } | |
95 | }, | |
96 | ||
97 | oniguruma = { | |
98 | external_dependencies = { | |
99 | ONIG = { | |
100 | header = "oniguruma.h", | |
101 | library = "onig" | |
102 | } | |
103 | }, | |
104 | build = { | |
105 | type = "builtin", | |
106 | modules = { | |
107 | rex_onig = { | |
108 | defines = defines, | |
109 | sources = {"src/common.c", "src/oniguruma/lonig.c", "src/oniguruma/lonig_f.c"}, | |
110 | libraries = {"onig"}, | |
111 | incdirs = {"$(ONIG_INCDIR)"}, | |
112 | libdirs = {"$(ONIG_LIBDIR)"} | |
113 | } | |
114 | } | |
115 | } | |
116 | }, | |
117 | ||
118 | TRE = { | |
119 | external_dependencies = { | |
120 | TRE = { | |
121 | header = "tre/tre.h", | |
122 | library = "tre" | |
123 | } | |
124 | }, | |
125 | build = { | |
126 | type = "builtin", | |
127 | modules = { | |
128 | rex_tre = { | |
129 | defines = defines, | |
130 | sources = {"src/common.c", "src/tre/ltre.c" --[[, "src/tre/tre_w.c"]]}, | |
131 | libraries = {"tre"}, | |
132 | incdirs = {"$(TRE_INCDIR)"}, | |
133 | libdirs = {"$(TRE_LIBDIR)"} | |
134 | } | |
135 | } | |
136 | } | |
137 | }, | |
138 | ||
139 | GNU = { | |
140 | external_dependencies = { | |
141 | GNU = { | |
142 | header = "regex.h", | |
143 | } | |
144 | }, | |
145 | build = { | |
146 | type = "builtin", | |
147 | modules = { | |
148 | rex_gnu = { | |
149 | defines = defines, | |
150 | sources = {"src/common.c", "src/gnu/lgnu.c"} | |
151 | } | |
152 | } | |
153 | } | |
154 | }, | |
155 | ||
156 | } -- close wrapper table |
0 | /* algo.h */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include "common.h" | |
4 | ||
5 | #define REX_VERSION "Lrexlib " VERSION | |
6 | ||
7 | /* Forward declarations */ | |
8 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE); | |
9 | static int findmatch_exec (TUserdata *ud, TArgExec *argE); | |
10 | static int split_exec (TUserdata *ud, TArgExec *argE, int offset); | |
11 | static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset); | |
12 | static int gmatch_exec (TUserdata *ud, TArgExec *argE); | |
13 | static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud); | |
14 | static int generate_error (lua_State *L, const TUserdata *ud, int errcode); | |
15 | ||
16 | #if LUA_VERSION_NUM == 501 | |
17 | # define ALG_ENVIRONINDEX LUA_ENVIRONINDEX | |
18 | #else | |
19 | # define ALG_ENVIRONINDEX lua_upvalueindex(1) | |
20 | #endif | |
21 | ||
22 | #ifndef ALG_CHARSIZE | |
23 | # define ALG_CHARSIZE 1 | |
24 | #endif | |
25 | ||
26 | #ifndef BUFFERZ_PUTREPSTRING | |
27 | # define BUFFERZ_PUTREPSTRING bufferZ_putrepstring | |
28 | #endif | |
29 | ||
30 | #ifndef ALG_GETCARGS | |
31 | # define ALG_GETCARGS(a,b,c) | |
32 | #endif | |
33 | ||
34 | #ifndef DO_NAMED_SUBPATTERNS | |
35 | #define DO_NAMED_SUBPATTERNS(a,b,c) | |
36 | #endif | |
37 | ||
38 | #define METHOD_FIND 0 | |
39 | #define METHOD_MATCH 1 | |
40 | #define METHOD_EXEC 2 | |
41 | #define METHOD_TFIND 3 | |
42 | ||
43 | ||
44 | static int OptLimit (lua_State *L, int pos) { | |
45 | if (lua_isnoneornil (L, pos)) | |
46 | return GSUB_UNLIMITED; | |
47 | if (lua_isfunction (L, pos)) | |
48 | return GSUB_CONDITIONAL; | |
49 | if (lua_isnumber (L, pos)) { | |
50 | int a = lua_tointeger (L, pos); | |
51 | return a < 0 ? 0 : a; | |
52 | } | |
53 | return luaL_typerror (L, pos, "number or function"); | |
54 | } | |
55 | ||
56 | ||
57 | static int get_startoffset(lua_State *L, int stackpos, size_t len) { | |
58 | int startoffset = (int)luaL_optinteger(L, stackpos, 1); | |
59 | if(startoffset > 0) | |
60 | startoffset--; | |
61 | else if(startoffset < 0) { | |
62 | startoffset += len/ALG_CHARSIZE; | |
63 | if(startoffset < 0) | |
64 | startoffset = 0; | |
65 | } | |
66 | return startoffset*ALG_CHARSIZE; | |
67 | } | |
68 | ||
69 | ||
70 | static TUserdata* test_ud (lua_State *L, int pos) | |
71 | { | |
72 | TUserdata *ud; | |
73 | if (lua_getmetatable(L, pos) && | |
74 | lua_rawequal(L, -1, ALG_ENVIRONINDEX) && | |
75 | (ud = (TUserdata *)lua_touserdata(L, pos)) != NULL) { | |
76 | lua_pop(L, 1); | |
77 | return ud; | |
78 | } | |
79 | return NULL; | |
80 | } | |
81 | ||
82 | ||
83 | static TUserdata* check_ud (lua_State *L) | |
84 | { | |
85 | TUserdata *ud = test_ud(L, 1); | |
86 | if (ud == NULL) luaL_typerror(L, 1, REX_TYPENAME); | |
87 | return ud; | |
88 | } | |
89 | ||
90 | ||
91 | static void check_subject (lua_State *L, int pos, TArgExec *argE) | |
92 | { | |
93 | int stype; | |
94 | argE->text = lua_tolstring (L, pos, &argE->textlen); | |
95 | stype = lua_type (L, pos); | |
96 | if (stype != LUA_TSTRING && stype != LUA_TTABLE && stype != LUA_TUSERDATA) { | |
97 | luaL_typerror (L, pos, "string, table or userdata"); | |
98 | } else if (argE->text == NULL) { | |
99 | int type; | |
100 | lua_getfield (L, pos, "topointer"); | |
101 | if (lua_type (L, -1) != LUA_TFUNCTION) | |
102 | luaL_error (L, "subject has no topointer method"); | |
103 | lua_pushvalue (L, pos); | |
104 | lua_call (L, 1, 1); | |
105 | type = lua_type (L, -1); | |
106 | if (type != LUA_TLIGHTUSERDATA) | |
107 | luaL_error (L, "subject's topointer method returned %s (expected lightuserdata)", | |
108 | lua_typename (L, type)); | |
109 | argE->text = (const char*) lua_touserdata (L, -1); | |
110 | lua_pop (L, 1); | |
111 | #if LUA_VERSION_NUM == 501 | |
112 | if (luaL_callmeta (L, pos, "__len")) { | |
113 | if (lua_type (L, -1) != LUA_TNUMBER) | |
114 | luaL_argerror (L, pos, "subject's length is not a number"); | |
115 | argE->textlen = lua_tointeger (L, -1); | |
116 | lua_pop (L, 1); | |
117 | } | |
118 | else | |
119 | argE->textlen = lua_objlen (L, pos); | |
120 | #else | |
121 | argE->textlen = luaL_len (L, pos); | |
122 | #endif | |
123 | } | |
124 | } | |
125 | ||
126 | static void check_pattern (lua_State *L, int pos, TArgComp *argC) | |
127 | { | |
128 | if (lua_isstring (L, pos)) { | |
129 | argC->pattern = lua_tolstring (L, pos, &argC->patlen); | |
130 | argC->ud = NULL; | |
131 | } | |
132 | else if ((argC->ud = test_ud (L, pos)) == NULL) | |
133 | luaL_typerror(L, pos, "string or " REX_TYPENAME); | |
134 | } | |
135 | ||
136 | static void checkarg_new (lua_State *L, TArgComp *argC) { | |
137 | argC->pattern = luaL_checklstring (L, 1, &argC->patlen); | |
138 | argC->cflags = ALG_GETCFLAGS (L, 2); | |
139 | ALG_GETCARGS (L, 3, argC); | |
140 | } | |
141 | ||
142 | ||
143 | /* function gsub (s, patt, f, [n], [cf], [ef], [larg...]) */ | |
144 | static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) { | |
145 | check_subject (L, 1, argE); | |
146 | check_pattern (L, 2, argC); | |
147 | lua_tostring (L, 3); /* converts number (if any) to string */ | |
148 | argE->reptype = lua_type (L, 3); | |
149 | if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE && | |
150 | argE->reptype != LUA_TFUNCTION) { | |
151 | luaL_typerror (L, 3, "string, table or function"); | |
152 | } | |
153 | argE->funcpos = 3; | |
154 | argE->funcpos2 = 4; | |
155 | argE->maxmatch = OptLimit (L, 4); | |
156 | argC->cflags = ALG_GETCFLAGS (L, 5); | |
157 | argE->eflags = (int)luaL_optinteger (L, 6, ALG_EFLAGS_DFLT); | |
158 | ALG_GETCARGS (L, 7, argC); | |
159 | } | |
160 | ||
161 | ||
162 | /* function count (s, patt, [cf], [ef], [larg...]) */ | |
163 | static void checkarg_count (lua_State *L, TArgComp *argC, TArgExec *argE) { | |
164 | check_subject (L, 1, argE); | |
165 | check_pattern (L, 2, argC); | |
166 | argC->cflags = ALG_GETCFLAGS (L, 3); | |
167 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
168 | ALG_GETCARGS (L, 5, argC); | |
169 | } | |
170 | ||
171 | ||
172 | /* function find (s, patt, [st], [cf], [ef], [larg...]) */ | |
173 | /* function match (s, patt, [st], [cf], [ef], [larg...]) */ | |
174 | static void checkarg_find_func (lua_State *L, TArgComp *argC, TArgExec *argE) { | |
175 | check_subject (L, 1, argE); | |
176 | check_pattern (L, 2, argC); | |
177 | argE->startoffset = get_startoffset (L, 3, argE->textlen); | |
178 | argC->cflags = ALG_GETCFLAGS (L, 4); | |
179 | argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT); | |
180 | ALG_GETCARGS (L, 6, argC); | |
181 | } | |
182 | ||
183 | ||
184 | /* function gmatch (s, patt, [cf], [ef], [larg...]) */ | |
185 | /* function split (s, patt, [cf], [ef], [larg...]) */ | |
186 | static void checkarg_gmatch_split (lua_State *L, TArgComp *argC, TArgExec *argE) { | |
187 | check_subject (L, 1, argE); | |
188 | check_pattern (L, 2, argC); | |
189 | argC->cflags = ALG_GETCFLAGS (L, 3); | |
190 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
191 | ALG_GETCARGS (L, 5, argC); | |
192 | } | |
193 | ||
194 | ||
195 | /* method r:tfind (s, [st], [ef]) */ | |
196 | /* method r:exec (s, [st], [ef]) */ | |
197 | /* method r:find (s, [st], [ef]) */ | |
198 | /* method r:match (s, [st], [ef]) */ | |
199 | static void checkarg_find_method (lua_State *L, TArgExec *argE, TUserdata **ud) { | |
200 | *ud = check_ud (L); | |
201 | check_subject (L, 2, argE); | |
202 | argE->startoffset = get_startoffset (L, 3, argE->textlen); | |
203 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
204 | } | |
205 | ||
206 | ||
207 | static int algf_new (lua_State *L) { | |
208 | TArgComp argC; | |
209 | checkarg_new (L, &argC); | |
210 | return compile_regex (L, &argC, NULL); | |
211 | } | |
212 | ||
213 | static void push_substrings (lua_State *L, TUserdata *ud, const char *text, | |
214 | TFreeList *freelist) { | |
215 | int i; | |
216 | if (lua_checkstack (L, ALG_NSUB(ud)) == 0) { | |
217 | if (freelist) | |
218 | freelist_free (freelist); | |
219 | luaL_error (L, "cannot add %d stack slots", ALG_NSUB(ud)); | |
220 | } | |
221 | for (i = 1; i <= ALG_NSUB(ud); i++) { | |
222 | ALG_PUSHSUB_OR_FALSE (L, ud, text, i); | |
223 | } | |
224 | } | |
225 | ||
226 | static int algf_gsub (lua_State *L) { | |
227 | TUserdata *ud; | |
228 | TArgComp argC; | |
229 | TArgExec argE; | |
230 | int n_match = 0, n_subst = 0, st = 0, last_to = -1; | |
231 | TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut; | |
232 | TFreeList freelist; | |
233 | /*------------------------------------------------------------------*/ | |
234 | checkarg_gsub (L, &argC, &argE); | |
235 | if (argC.ud) { | |
236 | ud = (TUserdata*) argC.ud; | |
237 | lua_pushvalue (L, 2); | |
238 | } | |
239 | else compile_regex (L, &argC, &ud); | |
240 | freelist_init (&freelist); | |
241 | /*------------------------------------------------------------------*/ | |
242 | if (argE.reptype == LUA_TSTRING) { | |
243 | buffer_init (&BufRep, 256, L, &freelist); | |
244 | BUFFERZ_PUTREPSTRING (&BufRep, argE.funcpos, ALG_NSUB(ud)); | |
245 | } | |
246 | /*------------------------------------------------------------------*/ | |
247 | if (argE.maxmatch == GSUB_CONDITIONAL) { | |
248 | buffer_init (&BufTemp, 1024, L, &freelist); | |
249 | pBuf = &BufTemp; | |
250 | } | |
251 | /*------------------------------------------------------------------*/ | |
252 | buffer_init (&BufOut, 1024, L, &freelist); | |
253 | while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (int)argE.textlen) { | |
254 | int from, to, res; | |
255 | int curr_subst = 0; | |
256 | res = gsub_exec (ud, &argE, st); | |
257 | if (ALG_NOMATCH (res)) { | |
258 | break; | |
259 | } | |
260 | else if (!ALG_ISMATCH (res)) { | |
261 | freelist_free (&freelist); | |
262 | return generate_error (L, ud, res); | |
263 | } | |
264 | from = ALG_BASE(st) + ALG_SUBBEG(ud,0); | |
265 | to = ALG_BASE(st) + ALG_SUBEND(ud,0); | |
266 | if (to == last_to) { /* discard an empty match adjacent to the previous match */ | |
267 | if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ | |
268 | buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); | |
269 | st += ALG_CHARSIZE; | |
270 | continue; | |
271 | } | |
272 | break; | |
273 | } | |
274 | last_to = to; | |
275 | ++n_match; | |
276 | if (st < from) { | |
277 | buffer_addlstring (&BufOut, argE.text + st, from - st); | |
278 | #ifdef ALG_PULL | |
279 | st = from; | |
280 | #endif | |
281 | } | |
282 | /*----------------------------------------------------------------*/ | |
283 | if (argE.reptype == LUA_TSTRING) { | |
284 | size_t iter = 0, num; | |
285 | const char *str; | |
286 | while (bufferZ_next (&BufRep, &iter, &num, &str)) { | |
287 | if (str) | |
288 | buffer_addlstring (pBuf, str, num); | |
289 | else if (num == 0 || ALG_SUBVALID (ud,num)) | |
290 | buffer_addlstring (pBuf, argE.text + ALG_BASE(st) + ALG_SUBBEG(ud,num), ALG_SUBLEN(ud,num)); | |
291 | } | |
292 | curr_subst = 1; | |
293 | } | |
294 | /*----------------------------------------------------------------*/ | |
295 | else if (argE.reptype == LUA_TTABLE) { | |
296 | if (ALG_NSUB(ud) > 0) | |
297 | ALG_PUSHSUB_OR_FALSE (L, ud, argE.text + ALG_BASE(st), 1); | |
298 | else | |
299 | lua_pushlstring (L, argE.text + from, to - from); | |
300 | lua_gettable (L, argE.funcpos); | |
301 | } | |
302 | /*----------------------------------------------------------------*/ | |
303 | else if (argE.reptype == LUA_TFUNCTION) { | |
304 | int narg; | |
305 | lua_pushvalue (L, argE.funcpos); | |
306 | if (ALG_NSUB(ud) > 0) { | |
307 | push_substrings (L, ud, argE.text + ALG_BASE(st), &freelist); | |
308 | narg = ALG_NSUB(ud); | |
309 | } | |
310 | else { | |
311 | lua_pushlstring (L, argE.text + from, to - from); | |
312 | narg = 1; | |
313 | } | |
314 | if (0 != lua_pcall (L, narg, 1, 0)) { | |
315 | freelist_free (&freelist); | |
316 | return lua_error (L); /* re-raise the error */ | |
317 | } | |
318 | } | |
319 | /*----------------------------------------------------------------*/ | |
320 | if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) { | |
321 | if (lua_tostring (L, -1)) { | |
322 | buffer_addvalue (pBuf, -1); | |
323 | curr_subst = 1; | |
324 | } | |
325 | else if (!lua_toboolean (L, -1)) | |
326 | buffer_addlstring (pBuf, argE.text + from, to - from); | |
327 | else { | |
328 | freelist_free (&freelist); | |
329 | luaL_error (L, "invalid replacement value (a %s)", luaL_typename (L, -1)); | |
330 | } | |
331 | if (argE.maxmatch != GSUB_CONDITIONAL) | |
332 | lua_pop (L, 1); | |
333 | } | |
334 | /*----------------------------------------------------------------*/ | |
335 | if (argE.maxmatch == GSUB_CONDITIONAL) { | |
336 | /* Call the function */ | |
337 | lua_pushvalue (L, argE.funcpos2); | |
338 | lua_pushinteger (L, from/ALG_CHARSIZE + 1); | |
339 | lua_pushinteger (L, to/ALG_CHARSIZE); | |
340 | if (argE.reptype == LUA_TSTRING) | |
341 | buffer_pushresult (&BufTemp); | |
342 | else { | |
343 | lua_pushvalue (L, -4); | |
344 | lua_remove (L, -5); | |
345 | } | |
346 | if (0 != lua_pcall (L, 3, 2, 0)) { | |
347 | freelist_free (&freelist); | |
348 | lua_error (L); /* re-raise the error */ | |
349 | } | |
350 | /* Handle the 1-st return value */ | |
351 | if (lua_isstring (L, -2)) { /* coercion is allowed here */ | |
352 | buffer_addvalue (&BufOut, -2); /* rep2 */ | |
353 | curr_subst = 1; | |
354 | } | |
355 | else if (lua_toboolean (L, -2)) | |
356 | buffer_addbuffer (&BufOut, &BufTemp); /* rep1 */ | |
357 | else { | |
358 | buffer_addlstring (&BufOut, argE.text + from, to - from); /* "no" */ | |
359 | curr_subst = 0; | |
360 | } | |
361 | /* Handle the 2-nd return value */ | |
362 | if (lua_type (L, -1) == LUA_TNUMBER) { /* no coercion is allowed here */ | |
363 | int n = lua_tointeger (L, -1); | |
364 | if (n < 0) /* n */ | |
365 | n = 0; | |
366 | argE.maxmatch = n_match + n; | |
367 | } | |
368 | else if (lua_toboolean (L, -1)) /* "yes to all" */ | |
369 | argE.maxmatch = GSUB_UNLIMITED; | |
370 | else | |
371 | buffer_clear (&BufTemp); | |
372 | ||
373 | lua_pop (L, 2); | |
374 | if (argE.maxmatch != GSUB_CONDITIONAL) | |
375 | pBuf = &BufOut; | |
376 | } | |
377 | /*----------------------------------------------------------------*/ | |
378 | n_subst += curr_subst; | |
379 | if (st < to) { | |
380 | st = to; | |
381 | } | |
382 | else if (st < (int)argE.textlen) { | |
383 | /* advance by 1 char (not replaced) */ | |
384 | buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); | |
385 | st += ALG_CHARSIZE; | |
386 | } | |
387 | else break; | |
388 | } | |
389 | /*------------------------------------------------------------------*/ | |
390 | buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st); | |
391 | buffer_pushresult (&BufOut); | |
392 | lua_pushinteger (L, n_match); | |
393 | lua_pushinteger (L, n_subst); | |
394 | freelist_free (&freelist); | |
395 | return 3; | |
396 | } | |
397 | ||
398 | ||
399 | static int algf_count (lua_State *L) { | |
400 | TUserdata *ud; | |
401 | TArgComp argC; | |
402 | TArgExec argE; | |
403 | int n_match = 0, st = 0, last_to = -1; | |
404 | /*------------------------------------------------------------------*/ | |
405 | checkarg_count (L, &argC, &argE); | |
406 | if (argC.ud) { | |
407 | ud = (TUserdata*) argC.ud; | |
408 | lua_pushvalue (L, 2); | |
409 | } | |
410 | else compile_regex (L, &argC, &ud); | |
411 | /*------------------------------------------------------------------*/ | |
412 | while (st <= (int)argE.textlen) { | |
413 | int to, res; | |
414 | res = gsub_exec (ud, &argE, st); | |
415 | if (ALG_NOMATCH (res)) { | |
416 | break; | |
417 | } | |
418 | else if (!ALG_ISMATCH (res)) { | |
419 | return generate_error (L, ud, res); | |
420 | } | |
421 | to = ALG_BASE(st) + ALG_SUBEND(ud,0); | |
422 | if (to == last_to) { /* discard an empty match adjacent to the previous match */ | |
423 | if (st < (int)argE.textlen) { /* advance by 1 char */ | |
424 | st += ALG_CHARSIZE; | |
425 | continue; | |
426 | } | |
427 | break; | |
428 | } | |
429 | last_to = to; | |
430 | ++n_match; | |
431 | #ifdef ALG_PULL | |
432 | { | |
433 | int from = ALG_BASE(st) + ALG_SUBBEG(ud,0); | |
434 | if (st < from) | |
435 | st = from; | |
436 | } | |
437 | #endif | |
438 | /*----------------------------------------------------------------*/ | |
439 | if (st < to) { | |
440 | st = to; | |
441 | } | |
442 | else if (st < (int)argE.textlen) { | |
443 | /* advance by 1 char (not replaced) */ | |
444 | st += ALG_CHARSIZE; | |
445 | } | |
446 | else break; | |
447 | } | |
448 | /*------------------------------------------------------------------*/ | |
449 | lua_pushinteger (L, n_match); | |
450 | return 1; | |
451 | } | |
452 | ||
453 | ||
454 | static int finish_generic_find (lua_State *L, TUserdata *ud, TArgExec *argE, | |
455 | int method, int res) | |
456 | { | |
457 | if (ALG_ISMATCH (res)) { | |
458 | if (method == METHOD_FIND) | |
459 | ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE->startoffset), 0); | |
460 | if (ALG_NSUB(ud)) /* push captures */ | |
461 | push_substrings (L, ud, argE->text, NULL); | |
462 | else if (method != METHOD_FIND) { | |
463 | ALG_PUSHSUB (L, ud, argE->text, 0); | |
464 | return 1; | |
465 | } | |
466 | return (method == METHOD_FIND) ? ALG_NSUB(ud) + 2 : ALG_NSUB(ud); | |
467 | } | |
468 | else if (ALG_NOMATCH (res)) | |
469 | return lua_pushnil (L), 1; | |
470 | else | |
471 | return generate_error (L, ud, res); | |
472 | } | |
473 | ||
474 | ||
475 | static int generic_find_func (lua_State *L, int method) { | |
476 | TUserdata *ud; | |
477 | TArgComp argC; | |
478 | TArgExec argE; | |
479 | int res; | |
480 | ||
481 | checkarg_find_func (L, &argC, &argE); | |
482 | if (argE.startoffset > (int)argE.textlen) | |
483 | return lua_pushnil (L), 1; | |
484 | ||
485 | if (argC.ud) { | |
486 | ud = (TUserdata*) argC.ud; | |
487 | lua_pushvalue (L, 2); | |
488 | } | |
489 | else compile_regex (L, &argC, &ud); | |
490 | res = findmatch_exec (ud, &argE); | |
491 | return finish_generic_find (L, ud, &argE, method, res); | |
492 | } | |
493 | ||
494 | ||
495 | static int algf_find (lua_State *L) { | |
496 | return generic_find_func (L, METHOD_FIND); | |
497 | } | |
498 | ||
499 | ||
500 | static int algf_match (lua_State *L) { | |
501 | return generic_find_func (L, METHOD_MATCH); | |
502 | } | |
503 | ||
504 | ||
505 | static int gmatch_iter (lua_State *L) { | |
506 | int last_end, res; | |
507 | TArgExec argE; | |
508 | TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); | |
509 | argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); | |
510 | argE.eflags = lua_tointeger (L, lua_upvalueindex (3)); | |
511 | argE.startoffset = lua_tointeger (L, lua_upvalueindex (4)); | |
512 | last_end = lua_tointeger (L, lua_upvalueindex (5)); | |
513 | ||
514 | while (1) { | |
515 | if (argE.startoffset > (int)argE.textlen) | |
516 | return 0; | |
517 | res = gmatch_exec (ud, &argE); | |
518 | if (ALG_ISMATCH (res)) { | |
519 | int incr = 0; | |
520 | if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */ | |
521 | if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) { | |
522 | argE.startoffset += ALG_CHARSIZE; | |
523 | continue; | |
524 | } | |
525 | incr = ALG_CHARSIZE; | |
526 | } | |
527 | last_end = ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0); | |
528 | lua_pushinteger(L, last_end + incr); /* update start offset */ | |
529 | lua_replace (L, lua_upvalueindex (4)); | |
530 | lua_pushinteger(L, last_end); /* update last end of match */ | |
531 | lua_replace (L, lua_upvalueindex (5)); | |
532 | /* push either captures or entire match */ | |
533 | if (ALG_NSUB(ud)) { | |
534 | push_substrings (L, ud, argE.text, NULL); | |
535 | return ALG_NSUB(ud); | |
536 | } | |
537 | else { | |
538 | ALG_PUSHSUB (L, ud, argE.text, 0); | |
539 | return 1; | |
540 | } | |
541 | } | |
542 | else if (ALG_NOMATCH (res)) | |
543 | return 0; | |
544 | else | |
545 | return generate_error (L, ud, res); | |
546 | } | |
547 | } | |
548 | ||
549 | ||
550 | static int split_iter (lua_State *L) { | |
551 | int incr, last_end, newoffset, res; | |
552 | TArgExec argE; | |
553 | TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); | |
554 | argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); | |
555 | argE.eflags = lua_tointeger (L, lua_upvalueindex (3)); | |
556 | argE.startoffset = lua_tointeger (L, lua_upvalueindex (4)); | |
557 | incr = lua_tointeger (L, lua_upvalueindex (5)); | |
558 | last_end = lua_tointeger (L, lua_upvalueindex (6)); | |
559 | ||
560 | if (incr < 0) | |
561 | return 0; | |
562 | ||
563 | while (1) { | |
564 | if ((newoffset = argE.startoffset + incr) > (int)argE.textlen) | |
565 | break; | |
566 | res = split_exec (ud, &argE, newoffset); | |
567 | if (ALG_ISMATCH (res)) { | |
568 | if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */ | |
569 | if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) { | |
570 | incr += ALG_CHARSIZE; | |
571 | continue; | |
572 | } | |
573 | } | |
574 | lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset and last_end */ | |
575 | lua_pushvalue (L, -1); | |
576 | lua_replace (L, lua_upvalueindex (4)); | |
577 | lua_replace (L, lua_upvalueindex (6)); | |
578 | lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */ | |
579 | lua_replace (L, lua_upvalueindex (5)); | |
580 | /* push text preceding the match */ | |
581 | lua_pushlstring (L, argE.text + argE.startoffset, | |
582 | ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset); | |
583 | /* push either captures or entire match */ | |
584 | if (ALG_NSUB(ud)) { | |
585 | push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL); | |
586 | return 1 + ALG_NSUB(ud); | |
587 | } | |
588 | else { | |
589 | ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0); | |
590 | return 2; | |
591 | } | |
592 | } | |
593 | else if (ALG_NOMATCH (res)) | |
594 | break; | |
595 | else | |
596 | return generate_error (L, ud, res); | |
597 | } | |
598 | lua_pushinteger (L, -1); /* mark as last iteration */ | |
599 | lua_replace (L, lua_upvalueindex (5)); /* incr = -1 */ | |
600 | lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset); | |
601 | return 1; | |
602 | } | |
603 | ||
604 | ||
605 | static int algf_gmatch (lua_State *L) | |
606 | { | |
607 | TArgComp argC; | |
608 | TArgExec argE; | |
609 | checkarg_gmatch_split (L, &argC, &argE); | |
610 | if (argC.ud) | |
611 | lua_pushvalue (L, 2); | |
612 | else | |
613 | compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */ | |
614 | gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ | |
615 | lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ | |
616 | lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ | |
617 | lua_pushinteger (L, -1); /* 5-th upvalue: last end of match */ | |
618 | lua_pushcclosure (L, gmatch_iter, 5); | |
619 | return 1; | |
620 | } | |
621 | ||
622 | static int algf_split (lua_State *L) | |
623 | { | |
624 | TArgComp argC; | |
625 | TArgExec argE; | |
626 | checkarg_gmatch_split (L, &argC, &argE); | |
627 | if (argC.ud) | |
628 | lua_pushvalue (L, 2); | |
629 | else | |
630 | compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */ | |
631 | gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ | |
632 | lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ | |
633 | lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ | |
634 | lua_pushinteger (L, 0); /* 5-th upvalue: incr */ | |
635 | lua_pushinteger (L, -1); /* 6-th upvalue: last_end */ | |
636 | lua_pushcclosure (L, split_iter, 6); | |
637 | return 1; | |
638 | } | |
639 | ||
640 | ||
641 | static void push_substring_table (lua_State *L, TUserdata *ud, const char *text) { | |
642 | int i; | |
643 | lua_newtable (L); | |
644 | for (i = 1; i <= ALG_NSUB(ud); i++) { | |
645 | ALG_PUSHSUB_OR_FALSE (L, ud, text, i); | |
646 | lua_rawseti (L, -2, i); | |
647 | } | |
648 | } | |
649 | ||
650 | ||
651 | static void push_offset_table (lua_State *L, TUserdata *ud, int startoffset) { | |
652 | int i, j; | |
653 | lua_newtable (L); | |
654 | for (i=1, j=1; i <= ALG_NSUB(ud); i++) { | |
655 | if (ALG_SUBVALID (ud,i)) { | |
656 | ALG_PUSHSTART (L, ud, startoffset, i); | |
657 | lua_rawseti (L, -2, j++); | |
658 | ALG_PUSHEND (L, ud, startoffset, i); | |
659 | lua_rawseti (L, -2, j++); | |
660 | } | |
661 | else { | |
662 | lua_pushboolean (L, 0); | |
663 | lua_rawseti (L, -2, j++); | |
664 | lua_pushboolean (L, 0); | |
665 | lua_rawseti (L, -2, j++); | |
666 | } | |
667 | } | |
668 | } | |
669 | ||
670 | ||
671 | static int generic_find_method (lua_State *L, int method) { | |
672 | TUserdata *ud; | |
673 | TArgExec argE; | |
674 | int res; | |
675 | ||
676 | checkarg_find_method (L, &argE, &ud); | |
677 | if (argE.startoffset > (int)argE.textlen) | |
678 | return lua_pushnil(L), 1; | |
679 | ||
680 | res = findmatch_exec (ud, &argE); | |
681 | if (ALG_ISMATCH (res)) { | |
682 | switch (method) { | |
683 | case METHOD_EXEC: | |
684 | ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0); | |
685 | push_offset_table (L, ud, ALG_BASE(argE.startoffset)); | |
686 | DO_NAMED_SUBPATTERNS (L, ud, argE.text); | |
687 | return 3; | |
688 | case METHOD_TFIND: | |
689 | ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0); | |
690 | push_substring_table (L, ud, argE.text); | |
691 | DO_NAMED_SUBPATTERNS (L, ud, argE.text); | |
692 | return 3; | |
693 | case METHOD_MATCH: | |
694 | case METHOD_FIND: | |
695 | return finish_generic_find (L, ud, &argE, method, res); | |
696 | } | |
697 | return 0; | |
698 | } | |
699 | else if (ALG_NOMATCH (res)) | |
700 | return lua_pushnil (L), 1; | |
701 | else | |
702 | return generate_error(L, ud, res); | |
703 | } | |
704 | ||
705 | ||
706 | static int algm_find (lua_State *L) { | |
707 | return generic_find_method (L, METHOD_FIND); | |
708 | } | |
709 | static int algm_match (lua_State *L) { | |
710 | return generic_find_method (L, METHOD_MATCH); | |
711 | } | |
712 | static int algm_tfind (lua_State *L) { | |
713 | return generic_find_method (L, METHOD_TFIND); | |
714 | } | |
715 | static int algm_exec (lua_State *L) { | |
716 | return generic_find_method (L, METHOD_EXEC); | |
717 | } | |
718 | ||
719 | static void alg_register (lua_State *L, const luaL_Reg *r_methods, | |
720 | const luaL_Reg *r_functions, const char *name) { | |
721 | /* Create a new function environment to serve as a metatable for methods. */ | |
722 | #if LUA_VERSION_NUM == 501 | |
723 | lua_newtable (L); | |
724 | lua_pushvalue (L, -1); | |
725 | lua_replace (L, LUA_ENVIRONINDEX); | |
726 | luaL_register (L, NULL, r_methods); | |
727 | #else | |
728 | luaL_newmetatable(L, REX_TYPENAME); | |
729 | lua_pushvalue(L, -1); | |
730 | luaL_setfuncs (L, r_methods, 1); | |
731 | #endif | |
732 | lua_pushvalue(L, -1); /* mt.__index = mt */ | |
733 | lua_setfield(L, -2, "__index"); | |
734 | ||
735 | /* Register functions. */ | |
736 | lua_createtable(L, 0, 8); | |
737 | #if LUA_VERSION_NUM == 501 | |
738 | luaL_register (L, NULL, r_functions); | |
739 | #else | |
740 | lua_pushvalue(L, -2); | |
741 | luaL_setfuncs (L, r_functions, 1); | |
742 | #endif | |
743 | #ifdef REX_CREATEGLOBALVAR | |
744 | lua_pushvalue(L, -1); | |
745 | lua_setglobal(L, REX_LIBNAME); | |
746 | #endif | |
747 | lua_pushfstring (L, REX_VERSION" (for %s)", name); | |
748 | lua_setfield (L, -2, "_VERSION"); | |
749 | #ifndef REX_NOEMBEDDEDTEST | |
750 | lua_pushcfunction (L, newmembuffer); | |
751 | lua_setfield (L, -2, "_newmembuffer"); | |
752 | #endif | |
753 | } |
0 | /* common.c */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <ctype.h> | |
5 | #include <string.h> | |
6 | #include "lua.h" | |
7 | #include "lauxlib.h" | |
8 | #include "common.h" | |
9 | ||
10 | #define N_ALIGN sizeof(int) | |
11 | ||
12 | /* the table must be on Lua stack top */ | |
13 | int get_int_field (lua_State *L, const char* field) | |
14 | { | |
15 | int val; | |
16 | lua_getfield (L, -1, field); | |
17 | val = lua_tointeger (L, -1); | |
18 | lua_pop (L, 1); | |
19 | return val; | |
20 | } | |
21 | ||
22 | /* the table must be on Lua stack top */ | |
23 | void set_int_field (lua_State *L, const char* field, int val) | |
24 | { | |
25 | lua_pushinteger (L, val); | |
26 | lua_setfield (L, -2, field); | |
27 | } | |
28 | ||
29 | void *Lmalloc(lua_State *L, size_t size) { | |
30 | void *ud; | |
31 | lua_Alloc lalloc = lua_getallocf(L, &ud); | |
32 | return lalloc(ud, NULL, 0, size); | |
33 | } | |
34 | ||
35 | void *Lrealloc(lua_State *L, void *p, size_t osize, size_t nsize) { | |
36 | void *ud; | |
37 | lua_Alloc lalloc = lua_getallocf(L, &ud); | |
38 | return lalloc(ud, p, osize, nsize); | |
39 | } | |
40 | ||
41 | void Lfree(lua_State *L, void *p, size_t osize) { | |
42 | void *ud; | |
43 | lua_Alloc lalloc = lua_getallocf(L, &ud); | |
44 | lalloc(ud, p, osize, 0); | |
45 | } | |
46 | ||
47 | /* This function fills a table with string-number pairs. | |
48 | The table can be passed as the 1-st lua-function parameter, | |
49 | otherwise it is created. The return value is the filled table. | |
50 | */ | |
51 | int get_flags (lua_State *L, const flag_pair **arrs) { | |
52 | const flag_pair *p; | |
53 | const flag_pair **pp; | |
54 | int nparams = lua_gettop(L); | |
55 | ||
56 | if(nparams == 0) | |
57 | lua_newtable(L); | |
58 | else { | |
59 | if(!lua_istable(L, 1)) | |
60 | luaL_argerror(L, 1, "not a table"); | |
61 | if(nparams > 1) | |
62 | lua_pushvalue(L, 1); | |
63 | } | |
64 | ||
65 | for(pp=arrs; *pp; ++pp) { | |
66 | for(p=*pp; p->key; ++p) { | |
67 | lua_pushstring(L, p->key); | |
68 | lua_pushinteger(L, p->val); | |
69 | lua_rawset(L, -3); | |
70 | } | |
71 | } | |
72 | return 1; | |
73 | } | |
74 | ||
75 | const char *get_flag_key (const flag_pair *fp, int val) { | |
76 | for (; fp->key; ++fp) { | |
77 | if (fp->val == val) | |
78 | return fp->key; | |
79 | } | |
80 | return NULL; | |
81 | } | |
82 | ||
83 | /* Classes */ | |
84 | ||
85 | /* | |
86 | * class TFreeList | |
87 | * *************** | |
88 | * Simple array of pointers to TBuffer's. | |
89 | * The array has fixed capacity (not expanded automatically). | |
90 | */ | |
91 | ||
92 | void freelist_init (TFreeList *fl) { | |
93 | fl->top = 0; | |
94 | } | |
95 | ||
96 | void freelist_add (TFreeList *fl, TBuffer *buf) { | |
97 | fl->list[fl->top++] = buf; | |
98 | } | |
99 | ||
100 | void freelist_free (TFreeList *fl) { | |
101 | while (fl->top > 0) | |
102 | buffer_free (fl->list[--fl->top]); | |
103 | } | |
104 | ||
105 | /* | |
106 | * class TBuffer | |
107 | * ************* | |
108 | * Auto-extensible array of characters for building long strings incrementally. | |
109 | * * Differs from luaL_Buffer in that: | |
110 | * * its operations do not change Lua stack top position | |
111 | * * buffer_addvalue does not extract the value from Lua stack | |
112 | * * buffer_pushresult does not have to be the last operation | |
113 | * * Uses TFreeList class: | |
114 | * * for inserting itself into a TFreeList instance for future clean-up | |
115 | * * calls freelist_free prior to calling luaL_error. | |
116 | * * Has specialized "Z-operations" for maintaining mixed string/integer | |
117 | * array: bufferZ_addlstring, bufferZ_addnum and bufferZ_next. | |
118 | * * if the array is intended to be "mixed", then the methods | |
119 | * buffer_addlstring and buffer_addvalue must not be used | |
120 | * (the application will crash on bufferZ_next). | |
121 | * * conversely, if the array is not intended to be "mixed", | |
122 | * then the method bufferZ_next must not be used. | |
123 | */ | |
124 | ||
125 | enum { ID_NUMBER, ID_STRING }; | |
126 | ||
127 | void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl) { | |
128 | buf->arr = (char*) Lmalloc(L, sz); | |
129 | if (!buf->arr) { | |
130 | freelist_free (fl); | |
131 | luaL_error (L, "malloc failed"); | |
132 | } | |
133 | buf->size = sz; | |
134 | buf->top = 0; | |
135 | buf->L = L; | |
136 | buf->freelist = fl; | |
137 | freelist_add (fl, buf); | |
138 | } | |
139 | ||
140 | void buffer_free (TBuffer *buf) { | |
141 | Lfree(buf->L, buf->arr, buf->size); | |
142 | } | |
143 | ||
144 | void buffer_clear (TBuffer *buf) { | |
145 | buf->top = 0; | |
146 | } | |
147 | ||
148 | void buffer_pushresult (TBuffer *buf) { | |
149 | lua_pushlstring (buf->L, buf->arr, buf->top); | |
150 | } | |
151 | ||
152 | void buffer_addbuffer (TBuffer *trg, TBuffer *src) { | |
153 | buffer_addlstring (trg, src->arr, src->top); | |
154 | } | |
155 | ||
156 | void buffer_addlstring (TBuffer *buf, const void *src, size_t sz) { | |
157 | size_t newtop = buf->top + sz; | |
158 | if (newtop > buf->size) { | |
159 | char *p = (char*) Lrealloc (buf->L, buf->arr, buf->size, 2 * newtop); /* 2x expansion */ | |
160 | if (!p) { | |
161 | freelist_free (buf->freelist); | |
162 | luaL_error (buf->L, "realloc failed"); | |
163 | } | |
164 | buf->arr = p; | |
165 | buf->size = 2 * newtop; | |
166 | } | |
167 | if (src) | |
168 | memcpy (buf->arr + buf->top, src, sz); | |
169 | buf->top = newtop; | |
170 | } | |
171 | ||
172 | void buffer_addvalue (TBuffer *buf, int stackpos) { | |
173 | size_t len; | |
174 | const char *p = lua_tolstring (buf->L, stackpos, &len); | |
175 | buffer_addlstring (buf, p, len); | |
176 | } | |
177 | ||
178 | void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) { | |
179 | int n; | |
180 | size_t header[2] = { ID_STRING }; | |
181 | header[1] = len; | |
182 | buffer_addlstring (buf, header, sizeof (header)); | |
183 | buffer_addlstring (buf, src, len); | |
184 | n = len % N_ALIGN; | |
185 | if (n) buffer_addlstring (buf, NULL, N_ALIGN - n); | |
186 | } | |
187 | ||
188 | void bufferZ_addnum (TBuffer *buf, size_t num) { | |
189 | size_t header[2] = { ID_NUMBER }; | |
190 | header[1] = num; | |
191 | buffer_addlstring (buf, header, sizeof (header)); | |
192 | } | |
193 | ||
194 | /* 1. When called repeatedly on the same TBuffer, its existing data | |
195 | is discarded and overwritten by the new data. | |
196 | 2. The TBuffer's array is never shrunk by this function. | |
197 | */ | |
198 | void bufferZ_putrepstring (TBuffer *BufRep, int reppos, int nsub) { | |
199 | char dbuf[] = { 0, 0 }; | |
200 | size_t replen; | |
201 | const char *p = lua_tolstring (BufRep->L, reppos, &replen); | |
202 | const char *end = p + replen; | |
203 | BufRep->top = 0; | |
204 | while (p < end) { | |
205 | const char *q; | |
206 | for (q = p; q < end && *q != '%'; ++q) | |
207 | {} | |
208 | if (q != p) | |
209 | bufferZ_addlstring (BufRep, p, q - p); | |
210 | if (q < end) { | |
211 | if (++q < end) { /* skip % */ | |
212 | if (isdigit (*q)) { | |
213 | int num; | |
214 | *dbuf = *q; | |
215 | num = strtol (dbuf, NULL, 10); | |
216 | if (num == 1 && nsub == 0) | |
217 | num = 0; | |
218 | else if (num > nsub) { | |
219 | freelist_free (BufRep->freelist); | |
220 | luaL_error (BufRep->L, "invalid capture index"); | |
221 | } | |
222 | bufferZ_addnum (BufRep, num); | |
223 | } | |
224 | else bufferZ_addlstring (BufRep, q, 1); | |
225 | } | |
226 | p = q + 1; | |
227 | } | |
228 | else break; | |
229 | } | |
230 | } | |
231 | ||
232 | /****************************************************************************** | |
233 | The intended use of this function is as follows: | |
234 | size_t iter = 0; | |
235 | while (bufferZ_next (buf, &iter, &num, &str)) { | |
236 | if (str) do_something_with_string (str, num); | |
237 | else do_something_with_number (num); | |
238 | } | |
239 | ******************************************************************************* | |
240 | */ | |
241 | int bufferZ_next (TBuffer *buf, size_t *iter, size_t *num, const char **str) { | |
242 | if (*iter < buf->top) { | |
243 | size_t *ptr_header = (size_t*)(buf->arr + *iter); | |
244 | *num = ptr_header[1]; | |
245 | *iter += 2 * sizeof (size_t); | |
246 | *str = NULL; | |
247 | if (*ptr_header == ID_STRING) { | |
248 | int n; | |
249 | *str = buf->arr + *iter; | |
250 | *iter += *num; | |
251 | n = *iter % N_ALIGN; | |
252 | if (n) *iter += (N_ALIGN - n); | |
253 | } | |
254 | return 1; | |
255 | } | |
256 | return 0; | |
257 | } | |
258 | ||
259 | #if LUA_VERSION_NUM > 501 | |
260 | int luaL_typerror (lua_State *L, int narg, const char *tname) { | |
261 | const char *msg = lua_pushfstring(L, "%s expected, got %s", | |
262 | tname, luaL_typename(L, narg)); | |
263 | return luaL_argerror(L, narg, msg); | |
264 | } | |
265 | #endif | |
266 | ||
267 | #ifndef REX_NOEMBEDDEDTEST | |
268 | static int ud_topointer (lua_State *L) { | |
269 | lua_pushlightuserdata (L, lua_touserdata (L, 1)); | |
270 | return 1; | |
271 | } | |
272 | ||
273 | static int ud_len (lua_State *L) { | |
274 | lua_pushinteger (L, lua_objlen (L, 1)); | |
275 | return 1; | |
276 | } | |
277 | ||
278 | /* for testing purposes only */ | |
279 | int newmembuffer (lua_State *L) { | |
280 | size_t len; | |
281 | const char* s = luaL_checklstring (L, 1, &len); | |
282 | void *ud = lua_newuserdata (L, len); | |
283 | memcpy (ud, s, len); | |
284 | lua_newtable (L); /* metatable */ | |
285 | lua_pushvalue (L, -1); | |
286 | lua_setfield (L, -2, "__index"); /* metatable.__index = metatable */ | |
287 | lua_pushcfunction (L, ud_topointer); | |
288 | lua_setfield (L, -2, "topointer"); | |
289 | lua_pushcfunction (L, ud_len); | |
290 | lua_setfield (L, -2, "__len"); | |
291 | lua_setmetatable (L, -2); | |
292 | return 1; | |
293 | } | |
294 | #endif /* #ifndef REX_NOEMBEDDEDTEST */ |
0 | /* common.h */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #ifndef COMMON_H | |
4 | #define COMMON_H | |
5 | ||
6 | #include "lua.h" | |
7 | ||
8 | #if LUA_VERSION_NUM > 501 | |
9 | # define lua_objlen lua_rawlen | |
10 | int luaL_typerror (lua_State *L, int narg, const char *tname); | |
11 | #endif | |
12 | ||
13 | /* REX_API can be overridden from the command line or Makefile */ | |
14 | #ifndef REX_API | |
15 | # define REX_API LUALIB_API | |
16 | #endif | |
17 | ||
18 | /* Special values for maxmatch in gsub. They all must be negative. */ | |
19 | #define GSUB_UNLIMITED -1 | |
20 | #define GSUB_CONDITIONAL -2 | |
21 | ||
22 | /* Common structs and functions */ | |
23 | ||
24 | typedef struct { | |
25 | const char* key; | |
26 | int val; | |
27 | } flag_pair; | |
28 | ||
29 | typedef struct { /* compile arguments */ | |
30 | const char * pattern; | |
31 | size_t patlen; | |
32 | void * ud; | |
33 | int cflags; | |
34 | const char * locale; /* PCRE, Oniguruma */ | |
35 | const unsigned char * tables; /* PCRE */ | |
36 | int tablespos; /* PCRE */ | |
37 | void * syntax; /* Oniguruma */ | |
38 | const unsigned char * translate; /* GNU */ | |
39 | int gnusyn; /* GNU */ | |
40 | } TArgComp; | |
41 | ||
42 | typedef struct { /* exec arguments */ | |
43 | const char * text; | |
44 | size_t textlen; | |
45 | int startoffset; | |
46 | int eflags; | |
47 | int funcpos; | |
48 | int maxmatch; | |
49 | int funcpos2; /* used with gsub */ | |
50 | int reptype; /* used with gsub */ | |
51 | size_t ovecsize; /* PCRE: dfa_exec */ | |
52 | size_t wscount; /* PCRE: dfa_exec */ | |
53 | } TArgExec; | |
54 | ||
55 | struct tagFreeList; /* forward declaration */ | |
56 | ||
57 | struct tagBuffer { | |
58 | size_t size; | |
59 | size_t top; | |
60 | char * arr; | |
61 | lua_State * L; | |
62 | struct tagFreeList * freelist; | |
63 | }; | |
64 | ||
65 | struct tagFreeList { | |
66 | struct tagBuffer * list[16]; | |
67 | int top; | |
68 | }; | |
69 | ||
70 | typedef struct tagBuffer TBuffer; | |
71 | typedef struct tagFreeList TFreeList; | |
72 | ||
73 | void freelist_init (TFreeList *fl); | |
74 | void freelist_add (TFreeList *fl, TBuffer *buf); | |
75 | void freelist_free (TFreeList *fl); | |
76 | ||
77 | void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl); | |
78 | void buffer_free (TBuffer *buf); | |
79 | void buffer_clear (TBuffer *buf); | |
80 | void buffer_addbuffer (TBuffer *trg, TBuffer *src); | |
81 | void buffer_addlstring (TBuffer *buf, const void *src, size_t sz); | |
82 | void buffer_addvalue (TBuffer *buf, int stackpos); | |
83 | void buffer_pushresult (TBuffer *buf); | |
84 | ||
85 | void bufferZ_putrepstring (TBuffer *buf, int reppos, int nsub); | |
86 | int bufferZ_next (TBuffer *buf, size_t *iter, size_t *len, const char **str); | |
87 | void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len); | |
88 | void bufferZ_addnum (TBuffer *buf, size_t num); | |
89 | ||
90 | int get_int_field (lua_State *L, const char* field); | |
91 | void set_int_field (lua_State *L, const char* field, int val); | |
92 | int get_flags (lua_State *L, const flag_pair **arr); | |
93 | const char *get_flag_key (const flag_pair *fp, int val); | |
94 | void *Lmalloc (lua_State *L, size_t size); | |
95 | void *Lrealloc (lua_State *L, void *p, size_t osize, size_t nsize); | |
96 | void Lfree (lua_State *L, void *p, size_t size); | |
97 | ||
98 | #ifndef REX_NOEMBEDDEDTEST | |
99 | int newmembuffer (lua_State *L); | |
100 | #endif | |
101 | ||
102 | #endif |
0 | /* lgnu.c - Lua binding of GNU regular expressions library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <ctype.h> | |
6 | #include "lua.h" | |
7 | #include "lauxlib.h" | |
8 | #include "../common.h" | |
9 | ||
10 | #define _GNU_SOURCE | |
11 | #ifndef __USE_GNU | |
12 | # define __USE_GNU | |
13 | #endif | |
14 | #ifndef REX_GNU_INCLUDE | |
15 | # include <regex.h> | |
16 | #else | |
17 | # include REX_GNU_INCLUDE | |
18 | #endif | |
19 | ||
20 | /* These 2 settings may be redefined from the command-line or the makefile. | |
21 | * They should be kept in sync between themselves and with the target name. | |
22 | */ | |
23 | #ifndef REX_LIBNAME | |
24 | # define REX_LIBNAME "rex_gnu" | |
25 | #endif | |
26 | #ifndef REX_OPENLIB | |
27 | # define REX_OPENLIB luaopen_rex_gnu | |
28 | #endif | |
29 | ||
30 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
31 | ||
32 | #define ALG_CFLAGS_DFLT RE_SYNTAX_POSIX_EXTENDED | |
33 | #define ALG_EFLAGS_DFLT 0 | |
34 | ||
35 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
36 | ||
37 | static const unsigned char *gettranslate (lua_State *L, int pos); | |
38 | #define ALG_GETCARGS(L,pos,argC) argC->translate = gettranslate (L, pos) | |
39 | ||
40 | #define ALG_NOMATCH(res) ((res) == -1 || (res) == -2) | |
41 | #define ALG_ISMATCH(res) ((res) >= 0) | |
42 | #define ALG_SUBBEG(ud,n) ud->match.start[n] | |
43 | #define ALG_SUBEND(ud,n) ud->match.end[n] | |
44 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) | |
45 | #define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) | |
46 | #define ALG_NSUB(ud) ((int)ud->r.re_nsub) | |
47 | ||
48 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
49 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) | |
50 | ||
51 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
52 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
53 | ||
54 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) | |
55 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) | |
56 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
57 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
58 | ||
59 | #define ALG_BASE(st) (st) | |
60 | ||
61 | typedef struct { | |
62 | struct re_pattern_buffer r; | |
63 | struct re_registers match; | |
64 | int freed; | |
65 | const char * errmsg; | |
66 | } TGnu; | |
67 | ||
68 | #define TUserdata TGnu | |
69 | ||
70 | #include "../algo.h" | |
71 | ||
72 | /* Functions | |
73 | ****************************************************************************** | |
74 | */ | |
75 | ||
76 | /* Execution flags, which we need to simulate as GNU does not use flags for this. */ | |
77 | #define GNU_NOTBOL 1 | |
78 | #define GNU_NOTEOL 2 | |
79 | #define GNU_BACKWARD 4 | |
80 | ||
81 | static int generate_error (lua_State *L, const TUserdata *ud, int errcode) { | |
82 | const char *errmsg; | |
83 | switch (errcode) { | |
84 | case 0: | |
85 | errmsg = ud->errmsg; | |
86 | break; | |
87 | case -1: | |
88 | errmsg = "no match"; | |
89 | break; | |
90 | case -2: | |
91 | errmsg = "internal error in GNU regex"; | |
92 | break; | |
93 | default: | |
94 | errmsg = "unknown error"; | |
95 | } | |
96 | return luaL_error (L, "%s", errmsg); | |
97 | } | |
98 | ||
99 | #define ALG_TRANSLATE_SIZE (UCHAR_MAX + 1) | |
100 | static const unsigned char *gettranslate (lua_State *L, int pos) { | |
101 | unsigned i; | |
102 | const unsigned char *translate; | |
103 | ||
104 | if (lua_isnoneornil (L, pos)) | |
105 | return NULL; | |
106 | ||
107 | translate = (const unsigned char *) malloc (ALG_TRANSLATE_SIZE); | |
108 | if (!translate) | |
109 | luaL_error (L, "malloc failed"); | |
110 | memset ((unsigned char *) translate, 0, ALG_TRANSLATE_SIZE); /* initialize all members to 0 */ | |
111 | for (i = 0; i <= UCHAR_MAX; i++) { | |
112 | lua_pushinteger (L, i); | |
113 | lua_gettable (L, pos); | |
114 | if (lua_tostring (L, -1)) | |
115 | ((unsigned char *) translate)[i] = *lua_tostring (L, -1); | |
116 | lua_pop (L, 1); | |
117 | } | |
118 | return translate; | |
119 | } | |
120 | ||
121 | static void seteflags (TGnu *ud, TArgExec *argE) { | |
122 | ud->r.not_bol = (argE->eflags & GNU_NOTBOL) != 0; | |
123 | ud->r.not_eol = (argE->eflags & GNU_NOTEOL) != 0; | |
124 | } | |
125 | ||
126 | static int compile_regex (lua_State *L, const TArgComp *argC, TGnu **pud) { | |
127 | const char *res; | |
128 | TGnu *ud; | |
129 | int ret; | |
130 | ||
131 | ud = (TGnu *)lua_newuserdata (L, sizeof (TGnu)); | |
132 | memset (ud, 0, sizeof (TGnu)); /* initialize all members to 0 */ | |
133 | ||
134 | re_set_syntax (argC->cflags); | |
135 | ||
136 | /* translate table is never written to, so this cast is safe */ | |
137 | ud->r.translate = (unsigned char *) argC->translate; | |
138 | ||
139 | res = re_compile_pattern (argC->pattern, argC->patlen, &ud->r); | |
140 | if (res != NULL) { | |
141 | ud->errmsg = res; | |
142 | ret = generate_error (L, ud, 0); | |
143 | } else { | |
144 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
145 | lua_setmetatable (L, -2); | |
146 | ||
147 | if (pud) *pud = ud; | |
148 | ret = 1; | |
149 | } | |
150 | ||
151 | return ret; | |
152 | } | |
153 | ||
154 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
155 | seteflags (ud, argE); | |
156 | if (argE->startoffset > 0) | |
157 | ud->r.not_bol = 1; | |
158 | argE->text += argE->startoffset; | |
159 | argE->textlen -= argE->startoffset; | |
160 | if (argE->eflags & GNU_BACKWARD) | |
161 | return re_search (&ud->r, argE->text, argE->textlen, argE->textlen, -argE->textlen, &ud->match); | |
162 | else | |
163 | return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match); | |
164 | } | |
165 | ||
166 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
167 | lua_pushlstring (L, argE->text, argE->textlen); | |
168 | } | |
169 | ||
170 | static int findmatch_exec (TGnu *ud, TArgExec *argE) { | |
171 | argE->text += argE->startoffset; | |
172 | argE->textlen -= argE->startoffset; | |
173 | seteflags (ud, argE); | |
174 | if (argE->eflags & GNU_BACKWARD) | |
175 | return re_search (&ud->r, argE->text, argE->textlen, argE->textlen, -argE->textlen, &ud->match); | |
176 | else | |
177 | return re_search (&ud->r, argE->text, argE->textlen, 0, argE->textlen, &ud->match); | |
178 | } | |
179 | ||
180 | static int gsub_exec (TGnu *ud, TArgExec *argE, int st) { | |
181 | seteflags (ud, argE); | |
182 | if (st > 0) | |
183 | ud->r.not_bol = 1; | |
184 | if (argE->eflags & GNU_BACKWARD) | |
185 | return re_search (&ud->r, argE->text + st, argE->textlen - st, argE->textlen - st, -(argE->textlen - st), &ud->match); | |
186 | else | |
187 | return re_search (&ud->r, argE->text + st, argE->textlen - st, 0, argE->textlen - st, &ud->match); | |
188 | } | |
189 | ||
190 | static int split_exec (TGnu *ud, TArgExec *argE, int offset) { | |
191 | seteflags (ud, argE); | |
192 | if (offset > 0) | |
193 | ud->r.not_bol = 1; | |
194 | if (argE->eflags & GNU_BACKWARD) | |
195 | return re_search (&ud->r, argE->text + offset, argE->textlen - offset, argE->textlen - offset, -(argE->textlen - offset), &ud->match); | |
196 | else | |
197 | return re_search (&ud->r, argE->text + offset, argE->textlen - offset, 0, argE->textlen - offset, &ud->match); | |
198 | } | |
199 | ||
200 | static int Gnu_gc (lua_State *L) { | |
201 | TGnu *ud = check_ud (L); | |
202 | if (ud->freed == 0) { /* precaution against "manual" __gc calling */ | |
203 | ud->freed = 1; | |
204 | regfree (&ud->r); | |
205 | free (ud->match.start); | |
206 | free (ud->match.end); | |
207 | } | |
208 | return 0; | |
209 | } | |
210 | ||
211 | static int Gnu_tostring (lua_State *L) { | |
212 | TGnu *ud = check_ud (L); | |
213 | if (ud->freed == 0) | |
214 | lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); | |
215 | else | |
216 | lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); | |
217 | return 1; | |
218 | } | |
219 | ||
220 | static flag_pair gnu_flags[] = | |
221 | { | |
222 | { "not_bol", GNU_NOTBOL }, | |
223 | { "not_eol", GNU_NOTEOL }, | |
224 | { "backward", GNU_BACKWARD }, | |
225 | /*---------------------------------------------------------------------------*/ | |
226 | { NULL, 0 } | |
227 | }; | |
228 | ||
229 | static flag_pair gnu_syntax_flags[] = { | |
230 | /* Syntax flag sets. */ | |
231 | { "SYNTAX_EMACS", RE_SYNTAX_EMACS }, | |
232 | { "SYNTAX_AWK", RE_SYNTAX_AWK }, | |
233 | { "SYNTAX_GNU_AWK", RE_SYNTAX_GNU_AWK }, | |
234 | { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK }, | |
235 | { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK }, | |
236 | { "SYNTAX_EGREP", RE_SYNTAX_EGREP }, | |
237 | { "SYNTAX_POSIX_EGREP", RE_SYNTAX_POSIX_EGREP }, | |
238 | { "SYNTAX_ED", RE_SYNTAX_ED }, | |
239 | { "SYNTAX_SED", RE_SYNTAX_SED }, | |
240 | { "SYNTAX_POSIX_AWK", RE_SYNTAX_POSIX_AWK }, | |
241 | { "SYNTAX_GREP", RE_SYNTAX_GREP }, | |
242 | { "SYNTAX_POSIX_BASIC", RE_SYNTAX_POSIX_BASIC }, | |
243 | { "SYNTAX_POSIX_MINIMAL_BASIC", RE_SYNTAX_POSIX_MINIMAL_BASIC }, | |
244 | { "SYNTAX_POSIX_EXTENDED", RE_SYNTAX_POSIX_EXTENDED }, | |
245 | { "SYNTAX_POSIX_MINIMAL_EXTENDED", RE_SYNTAX_POSIX_MINIMAL_EXTENDED }, | |
246 | ||
247 | /* Individual syntax flags. */ | |
248 | { "BACKSLASH_ESCAPE_IN_LISTS", RE_BACKSLASH_ESCAPE_IN_LISTS }, | |
249 | { "BK_PLUS_QM", RE_BK_PLUS_QM }, | |
250 | { "CHAR_CLASSES", RE_CHAR_CLASSES }, | |
251 | { "CONTEXT_INDEP_ANCHORS", RE_CONTEXT_INDEP_ANCHORS }, | |
252 | { "CONTEXT_INDEP_OPS", RE_CONTEXT_INDEP_OPS }, | |
253 | { "CONTEXT_INVALID_OPS", RE_CONTEXT_INVALID_OPS }, | |
254 | { "DOT_NEWLINE", RE_DOT_NEWLINE }, | |
255 | { "DOT_NOT_NULL", RE_DOT_NOT_NULL }, | |
256 | { "HAT_LISTS_NOT_NEWLINE", RE_HAT_LISTS_NOT_NEWLINE }, | |
257 | { "INTERVALS", RE_INTERVALS }, | |
258 | { "LIMITED_OPS", RE_LIMITED_OPS }, | |
259 | { "NEWLINE_ALT", RE_NEWLINE_ALT }, | |
260 | { "NO_BK_BRACES", RE_NO_BK_BRACES }, | |
261 | { "NO_BK_PARENS", RE_NO_BK_PARENS }, | |
262 | { "NO_BK_REFS", RE_NO_BK_REFS }, | |
263 | { "NO_BK_VBAR", RE_NO_BK_VBAR }, | |
264 | { "NO_EMPTY_RANGES", RE_NO_EMPTY_RANGES }, | |
265 | { "UNMATCHED_RIGHT_PAREN_ORD", RE_UNMATCHED_RIGHT_PAREN_ORD }, | |
266 | { "NO_POSIX_BACKTRACKING", RE_NO_POSIX_BACKTRACKING }, | |
267 | { "NO_GNU_OPS", RE_NO_GNU_OPS }, | |
268 | { "DEBUG", RE_DEBUG }, | |
269 | { "INVALID_INTERVAL_ORD", RE_INVALID_INTERVAL_ORD }, | |
270 | { "ICASE", RE_ICASE }, | |
271 | { "CARET_ANCHORS_HERE", RE_CARET_ANCHORS_HERE }, | |
272 | { "CONTEXT_INVALID_DUP", RE_CONTEXT_INVALID_DUP }, | |
273 | { "NO_SUB", RE_NO_SUB }, | |
274 | #ifdef RE_PLAIN | |
275 | { "PLAIN", RE_PLAIN }, | |
276 | #endif | |
277 | /*---------------------------------------------------------------------------*/ | |
278 | { NULL, 0 } | |
279 | }; | |
280 | ||
281 | static int Gnu_get_flags (lua_State *L) { | |
282 | const flag_pair* fps[] = { gnu_flags, gnu_syntax_flags, NULL }; | |
283 | return get_flags (L, fps); | |
284 | } | |
285 | ||
286 | static const luaL_Reg r_methods[] = { | |
287 | { "exec", algm_exec }, | |
288 | { "tfind", algm_tfind }, /* old match */ | |
289 | { "find", algm_find }, | |
290 | { "match", algm_match }, | |
291 | { "__gc", Gnu_gc }, | |
292 | { "__tostring", Gnu_tostring }, | |
293 | { NULL, NULL} | |
294 | }; | |
295 | ||
296 | static const luaL_Reg r_functions[] = { | |
297 | { "match", algf_match }, | |
298 | { "find", algf_find }, | |
299 | { "gmatch", algf_gmatch }, | |
300 | { "gsub", algf_gsub }, | |
301 | { "count", algf_count }, | |
302 | { "split", algf_split }, | |
303 | { "new", algf_new }, | |
304 | { "flags", Gnu_get_flags }, | |
305 | { NULL, NULL } | |
306 | }; | |
307 | ||
308 | /* Open the library */ | |
309 | REX_API int REX_OPENLIB (lua_State *L) | |
310 | { | |
311 | alg_register(L, r_methods, r_functions, "GNU regexes"); | |
312 | return 1; | |
313 | } |
0 | /* lonig.c - Lua binding of Oniguruma library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <locale.h> | |
6 | #include <ctype.h> | |
7 | #include <oniguruma.h> | |
8 | ||
9 | #include "lua.h" | |
10 | #include "lauxlib.h" | |
11 | #include "../common.h" | |
12 | ||
13 | extern int LOnig_get_flags (lua_State *L); | |
14 | ||
15 | /* These 2 settings may be redefined from the command-line or the makefile. | |
16 | * They should be kept in sync between themselves and with the target name. | |
17 | */ | |
18 | #ifndef REX_LIBNAME | |
19 | # define REX_LIBNAME "rex_onig" | |
20 | #endif | |
21 | #ifndef REX_OPENLIB | |
22 | # define REX_OPENLIB luaopen_rex_onig | |
23 | #endif | |
24 | ||
25 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
26 | ||
27 | #define ALG_CFLAGS_DFLT ONIG_OPTION_NONE | |
28 | #define ALG_EFLAGS_DFLT 0 | |
29 | ||
30 | static int getcflags (lua_State *L, int pos); | |
31 | #define ALG_GETCFLAGS(L,pos) getcflags(L, pos) | |
32 | ||
33 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC); | |
34 | #define ALG_GETCARGS(a,b,c) checkarg_compile(a,b,c) | |
35 | ||
36 | #define ALG_NOMATCH(res) ((res) == ONIG_MISMATCH) | |
37 | #define ALG_ISMATCH(res) ((res) >= 0) | |
38 | #define ALG_SUBBEG(ud,n) ud->region->beg[n] | |
39 | #define ALG_SUBEND(ud,n) ud->region->end[n] | |
40 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) | |
41 | #define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) | |
42 | #define ALG_NSUB(ud) onig_number_of_captures(ud->reg) | |
43 | ||
44 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
45 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) | |
46 | ||
47 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
48 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
49 | ||
50 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) | |
51 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) | |
52 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
53 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
54 | ||
55 | #define ALG_BASE(st) 0 | |
56 | #define ALG_PULL | |
57 | ||
58 | typedef struct { | |
59 | regex_t *reg; | |
60 | OnigRegion *region; | |
61 | OnigErrorInfo einfo; | |
62 | } TOnig; | |
63 | ||
64 | #define TUserdata TOnig | |
65 | ||
66 | static void do_named_subpatterns (lua_State *L, TOnig *ud, const char *text); | |
67 | # define DO_NAMED_SUBPATTERNS do_named_subpatterns | |
68 | ||
69 | #include "../algo.h" | |
70 | ||
71 | #define CUC const unsigned char* | |
72 | ||
73 | /* Functions | |
74 | ****************************************************************************** | |
75 | */ | |
76 | ||
77 | static int getcflags (lua_State *L, int pos) { | |
78 | switch (lua_type (L, pos)) { | |
79 | case LUA_TNONE: | |
80 | case LUA_TNIL: | |
81 | return ALG_CFLAGS_DFLT; | |
82 | case LUA_TNUMBER: | |
83 | return lua_tointeger (L, pos); | |
84 | case LUA_TSTRING: { | |
85 | const char *s = lua_tostring (L, pos); | |
86 | int res = 0, ch; | |
87 | while ((ch = *s++) != '\0') { | |
88 | if (ch == 'i') res |= ONIG_OPTION_IGNORECASE; | |
89 | else if (ch == 'm') res |= ONIG_OPTION_NEGATE_SINGLELINE; | |
90 | else if (ch == 's') res |= ONIG_OPTION_MULTILINE; | |
91 | else if (ch == 'x') res |= ONIG_OPTION_EXTEND; | |
92 | /* else if (ch == 'U') res |= PCRE_UNGREEDY; */ | |
93 | /* else if (ch == 'X') res |= PCRE_EXTRA; */ | |
94 | } | |
95 | return res; | |
96 | } | |
97 | default: | |
98 | return luaL_typerror (L, pos, "number or string"); | |
99 | } | |
100 | } | |
101 | ||
102 | static int generate_error (lua_State *L, const TOnig *ud, int errcode) { | |
103 | char buf [ONIG_MAX_ERROR_MESSAGE_LEN]; | |
104 | onig_error_code_to_str((unsigned char*) buf, errcode, &ud->einfo); | |
105 | return luaL_error(L, buf); | |
106 | } | |
107 | ||
108 | typedef struct { | |
109 | const char * name; | |
110 | void * value; | |
111 | } EncPair; | |
112 | ||
113 | /* ATTENTION: | |
114 | This array must always be kept alphabetically sorted, as it's used in the | |
115 | binary search, so take care when manually inserting new elements. | |
116 | */ | |
117 | static EncPair Encodings[] = { | |
118 | { "ASCII", ONIG_ENCODING_ASCII }, | |
119 | { "BIG5", ONIG_ENCODING_BIG5 }, | |
120 | { "CP1251", ONIG_ENCODING_CP1251 }, | |
121 | { "EUC_CN", ONIG_ENCODING_EUC_CN }, | |
122 | { "EUC_JP", ONIG_ENCODING_EUC_JP }, | |
123 | { "EUC_KR", ONIG_ENCODING_EUC_KR }, | |
124 | { "EUC_TW", ONIG_ENCODING_EUC_TW }, | |
125 | { "GB18030", ONIG_ENCODING_GB18030 }, | |
126 | { "ISO_8859_1", ONIG_ENCODING_ISO_8859_1 }, | |
127 | { "ISO_8859_10", ONIG_ENCODING_ISO_8859_10 }, | |
128 | { "ISO_8859_11", ONIG_ENCODING_ISO_8859_11 }, | |
129 | { "ISO_8859_13", ONIG_ENCODING_ISO_8859_13 }, | |
130 | { "ISO_8859_14", ONIG_ENCODING_ISO_8859_14 }, | |
131 | { "ISO_8859_15", ONIG_ENCODING_ISO_8859_15 }, | |
132 | { "ISO_8859_16", ONIG_ENCODING_ISO_8859_16 }, | |
133 | { "ISO_8859_2", ONIG_ENCODING_ISO_8859_2 }, | |
134 | { "ISO_8859_3", ONIG_ENCODING_ISO_8859_3 }, | |
135 | { "ISO_8859_4", ONIG_ENCODING_ISO_8859_4 }, | |
136 | { "ISO_8859_5", ONIG_ENCODING_ISO_8859_5 }, | |
137 | { "ISO_8859_6", ONIG_ENCODING_ISO_8859_6 }, | |
138 | { "ISO_8859_7", ONIG_ENCODING_ISO_8859_7 }, | |
139 | { "ISO_8859_8", ONIG_ENCODING_ISO_8859_8 }, | |
140 | { "ISO_8859_9", ONIG_ENCODING_ISO_8859_9 }, | |
141 | /*{ "KOI8", ONIG_ENCODING_KOI8 }, */ | |
142 | { "KOI8_R", ONIG_ENCODING_KOI8_R }, | |
143 | { "SJIS", ONIG_ENCODING_SJIS }, | |
144 | { "UNDEF", ONIG_ENCODING_UNDEF }, | |
145 | { "UTF16_BE", ONIG_ENCODING_UTF16_BE }, | |
146 | { "UTF16_LE", ONIG_ENCODING_UTF16_LE }, | |
147 | { "UTF32_BE", ONIG_ENCODING_UTF32_BE }, | |
148 | { "UTF32_LE", ONIG_ENCODING_UTF32_LE }, | |
149 | { "UTF8", ONIG_ENCODING_UTF8 }, | |
150 | }; | |
151 | ||
152 | /* ATTENTION: | |
153 | This array must always be kept alphabetically sorted, as it's used in the | |
154 | binary search, so take care when manually inserting new elements. | |
155 | */ | |
156 | static EncPair Syntaxes[] = { | |
157 | { "ASIS", ONIG_SYNTAX_ASIS }, | |
158 | /*{ "DEFAULT", ONIG_SYNTAX_DEFAULT }, */ | |
159 | { "EMACS", ONIG_SYNTAX_EMACS }, | |
160 | { "GNU_REGEX", ONIG_SYNTAX_GNU_REGEX }, | |
161 | { "GREP", ONIG_SYNTAX_GREP }, | |
162 | { "JAVA", ONIG_SYNTAX_JAVA }, | |
163 | { "PERL", ONIG_SYNTAX_PERL }, | |
164 | { "PERL_NG", ONIG_SYNTAX_PERL_NG }, | |
165 | { "POSIX_BASIC", ONIG_SYNTAX_POSIX_BASIC }, | |
166 | { "POSIX_EXTENDED", ONIG_SYNTAX_POSIX_EXTENDED }, | |
167 | { "RUBY", ONIG_SYNTAX_RUBY }, | |
168 | }; | |
169 | ||
170 | static int fcmp(const void *p1, const void *p2) { | |
171 | return strcmp(((EncPair*)p1)->name, ((EncPair*)p2)->name); | |
172 | } | |
173 | ||
174 | static const char *getlocale (lua_State *L, int pos) { | |
175 | EncPair key, *found; | |
176 | if ((key.name = luaL_optstring(L, pos, NULL)) == NULL) | |
177 | return (const char*)ONIG_ENCODING_ASCII; | |
178 | found = (EncPair*) bsearch(&key, Encodings, sizeof(Encodings)/sizeof(EncPair), | |
179 | sizeof(EncPair), fcmp); | |
180 | if (found == NULL) | |
181 | luaL_argerror(L, pos, "invalid or unsupported encoding string"); | |
182 | return (const char*)found->value; | |
183 | } | |
184 | ||
185 | static void *getsyntax (lua_State *L, int pos) { | |
186 | EncPair key, *found; | |
187 | if ((key.name = luaL_optstring(L, pos, NULL)) == NULL) | |
188 | return ONIG_SYNTAX_DEFAULT; | |
189 | found = (EncPair*) bsearch(&key, Syntaxes, sizeof(Syntaxes)/sizeof(EncPair), | |
190 | sizeof(EncPair), fcmp); | |
191 | if (found == NULL) | |
192 | luaL_argerror(L, pos, "invalid or unsupported syntax string"); | |
193 | return found->value; | |
194 | } | |
195 | ||
196 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) { | |
197 | argC->locale = getlocale (L, pos); | |
198 | argC->syntax = getsyntax (L, pos + 1); | |
199 | } | |
200 | ||
201 | /* | |
202 | rex.setdefaultsyntax (syntax) | |
203 | @param syntax: one of the predefined strings listed in array 'Syntaxes' | |
204 | @return: nothing | |
205 | */ | |
206 | static int LOnig_setdefaultsyntax (lua_State *L) { | |
207 | (void)luaL_checkstring(L, 1); | |
208 | onig_set_default_syntax((OnigSyntaxType*) getsyntax(L, 1)); | |
209 | return 0; | |
210 | } | |
211 | ||
212 | static int compile_regex (lua_State *L, const TArgComp *argC, TOnig **pud) { | |
213 | TOnig *ud; | |
214 | int r; | |
215 | ||
216 | ud = (TOnig*)lua_newuserdata (L, sizeof (TOnig)); | |
217 | memset (ud, 0, sizeof (TOnig)); /* initialize all members to 0 */ | |
218 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
219 | lua_setmetatable (L, -2); | |
220 | ||
221 | r = onig_new(&ud->reg, (CUC)argC->pattern, (CUC)argC->pattern + argC->patlen, | |
222 | argC->cflags, (OnigEncoding)argC->locale, (OnigSyntaxType*)argC->syntax, | |
223 | &ud->einfo); | |
224 | if (r != ONIG_NORMAL) | |
225 | return generate_error(L, ud, r); | |
226 | ||
227 | if ((ud->region = onig_region_new()) == NULL) | |
228 | return luaL_error(L, "`onig_region_new' failed"); | |
229 | ||
230 | if (pud) *pud = ud; | |
231 | return 1; | |
232 | } | |
233 | ||
234 | typedef struct { | |
235 | lua_State *L; | |
236 | TOnig *ud; | |
237 | const char *text; | |
238 | } TNameArg; | |
239 | ||
240 | static int name_callback (const UChar *name, const UChar *name_end, | |
241 | int ngroups, int *groupnumlist, regex_t *reg, void *arg) | |
242 | { | |
243 | (void) ngroups; | |
244 | (void) groupnumlist; | |
245 | TNameArg *A = (TNameArg*)arg; | |
246 | int num = onig_name_to_backref_number(reg, name, name_end, A->ud->region); | |
247 | lua_pushlstring (A->L, (const char*)name, name_end - name); | |
248 | ALG_PUSHSUB_OR_FALSE (A->L, A->ud, A->text, num); | |
249 | lua_rawset (A->L, -3); | |
250 | return 0; | |
251 | } | |
252 | ||
253 | /* the target table must be on lua stack top */ | |
254 | static void do_named_subpatterns (lua_State *L, TOnig *ud, const char *text) { | |
255 | if (onig_number_of_names (ud->reg) > 0) { | |
256 | TNameArg A = { L, ud, text }; | |
257 | onig_foreach_name(ud->reg, name_callback, &A); | |
258 | } | |
259 | } | |
260 | ||
261 | static int findmatch_exec (TUserdata *ud, TArgExec *argE) { | |
262 | const char *end = argE->text + argE->textlen; | |
263 | onig_region_clear(ud->region); | |
264 | return onig_search (ud->reg, (CUC)argE->text, (CUC)end, | |
265 | (CUC)argE->text + argE->startoffset, (CUC)end, | |
266 | ud->region, argE->eflags); | |
267 | } | |
268 | ||
269 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
270 | lua_pushlstring (L, argE->text, argE->textlen); | |
271 | } | |
272 | ||
273 | static int gmatch_exec (TOnig *ud, TArgExec *argE) { | |
274 | return findmatch_exec(ud, argE); | |
275 | } | |
276 | ||
277 | static int gsub_exec (TOnig *ud, TArgExec *argE, int st) { | |
278 | const char *end = argE->text + argE->textlen; | |
279 | onig_region_clear(ud->region); | |
280 | return onig_search (ud->reg, (CUC)argE->text, (CUC)end, (CUC)argE->text + st, | |
281 | (CUC)end, ud->region, argE->eflags); | |
282 | } | |
283 | ||
284 | static int split_exec (TOnig *ud, TArgExec *argE, int st) { | |
285 | return gsub_exec(ud, argE, st); | |
286 | } | |
287 | ||
288 | static int LOnig_capturecount (lua_State *L) { | |
289 | TOnig *ud = check_ud(L); | |
290 | lua_pushinteger(L, onig_number_of_captures(ud->reg)); | |
291 | return 1; | |
292 | } | |
293 | ||
294 | static int LOnig_gc (lua_State *L) { | |
295 | TOnig *ud = check_ud (L); | |
296 | if (ud->reg) { /* precaution against "manual" __gc calling */ | |
297 | onig_free (ud->reg); | |
298 | ud->reg = NULL; | |
299 | } | |
300 | if (ud->region) { | |
301 | onig_region_free (ud->region, 1); | |
302 | ud->region = NULL; | |
303 | } | |
304 | return 0; | |
305 | } | |
306 | ||
307 | static int LOnig_tostring (lua_State *L) { | |
308 | TOnig *ud = check_ud (L); | |
309 | if (ud->reg) | |
310 | lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); | |
311 | else | |
312 | lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); | |
313 | return 1; | |
314 | } | |
315 | ||
316 | static int LOnig_version (lua_State *L) { | |
317 | lua_pushstring (L, onig_version ()); | |
318 | return 1; | |
319 | } | |
320 | ||
321 | static int LOnig_internal_test (lua_State *L) { | |
322 | unsigned int i; | |
323 | for (i=1; i<sizeof(Encodings)/sizeof(Encodings[0]); i++) { | |
324 | if (fcmp(&Encodings[i-1], &Encodings[i]) >= 0) { | |
325 | lua_pushboolean(L, 0); | |
326 | lua_pushstring(L, "Array 'Encodings' is not properly sorted."); | |
327 | return 2; | |
328 | } | |
329 | } | |
330 | for (i=1; i<sizeof(Syntaxes)/sizeof(Syntaxes[0]); i++) { | |
331 | if (fcmp(&Syntaxes[i-1], &Syntaxes[i]) >= 0) { | |
332 | lua_pushboolean(L, 0); | |
333 | lua_pushstring(L, "Array 'Syntaxes' is not properly sorted."); | |
334 | return 2; | |
335 | } | |
336 | } | |
337 | lua_pushboolean(L, 1); | |
338 | return 1; | |
339 | } | |
340 | ||
341 | static const luaL_Reg r_methods[] = { | |
342 | { "exec", algm_exec }, | |
343 | { "tfind", algm_tfind }, /* old name: match */ | |
344 | { "find", algm_find }, | |
345 | { "match", algm_match }, | |
346 | { "capturecount", LOnig_capturecount }, | |
347 | { "__gc", LOnig_gc }, | |
348 | { "__tostring", LOnig_tostring }, | |
349 | { NULL, NULL } | |
350 | }; | |
351 | ||
352 | static const luaL_Reg r_functions[] = { | |
353 | { "match", algf_match }, | |
354 | { "find", algf_find }, | |
355 | { "gmatch", algf_gmatch }, | |
356 | { "gsub", algf_gsub }, | |
357 | { "count", algf_count }, | |
358 | { "split", algf_split }, | |
359 | { "new", algf_new }, | |
360 | { "flags", LOnig_get_flags }, | |
361 | { "version", LOnig_version }, | |
362 | { "setdefaultsyntax", LOnig_setdefaultsyntax }, | |
363 | { "internal_test", LOnig_internal_test }, | |
364 | { NULL, NULL } | |
365 | }; | |
366 | ||
367 | /* Open the library */ | |
368 | REX_API int REX_OPENLIB (lua_State *L) { | |
369 | if (ONIGURUMA_VERSION_MAJOR > atoi (onig_version ())) { | |
370 | return luaL_error (L, "%s requires at least version %d of Oniguruma library", | |
371 | REX_LIBNAME, (int)ONIGURUMA_VERSION_MAJOR); | |
372 | } | |
373 | onig_init(); | |
374 | onig_set_default_syntax(ONIG_SYNTAX_RUBY); | |
375 | alg_register(L, r_methods, r_functions, "Oniguruma"); | |
376 | return 1; | |
377 | } |
0 | /* lonig_f.c - Lua binding of Oniguruma library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <oniguruma.h> | |
4 | #include <lua.h> | |
5 | #include <lauxlib.h> | |
6 | #include "../common.h" | |
7 | ||
8 | static flag_pair onig_flags[] = { | |
9 | #ifdef ONIG_INFINITE_DISTANCE | |
10 | { "ONIG_INFINITE_DISTANCE", ONIG_INFINITE_DISTANCE }, | |
11 | #endif | |
12 | #ifdef ONIG_NREGION | |
13 | { "ONIG_NREGION", ONIG_NREGION }, | |
14 | #endif | |
15 | #ifdef ONIG_MAX_BACKREF_NUM | |
16 | { "ONIG_MAX_BACKREF_NUM", ONIG_MAX_BACKREF_NUM }, | |
17 | #endif | |
18 | #ifdef ONIG_MAX_REPEAT_NUM | |
19 | { "ONIG_MAX_REPEAT_NUM", ONIG_MAX_REPEAT_NUM }, | |
20 | #endif | |
21 | #ifdef ONIG_MAX_MULTI_BYTE_RANGES_NUM | |
22 | { "ONIG_MAX_MULTI_BYTE_RANGES_NUM", ONIG_MAX_MULTI_BYTE_RANGES_NUM }, | |
23 | #endif | |
24 | #ifdef ONIG_MAX_ERROR_MESSAGE_LEN | |
25 | { "ONIG_MAX_ERROR_MESSAGE_LEN", ONIG_MAX_ERROR_MESSAGE_LEN }, | |
26 | #endif | |
27 | #ifdef ONIG_OPTION_DEFAULT | |
28 | { "ONIG_OPTION_DEFAULT", ONIG_OPTION_DEFAULT }, | |
29 | { "DEFAULT", ONIG_OPTION_DEFAULT }, | |
30 | #endif | |
31 | #ifdef ONIG_OPTION_NONE | |
32 | { "ONIG_OPTION_NONE", ONIG_OPTION_NONE }, | |
33 | { "NONE", ONIG_OPTION_NONE }, | |
34 | #endif | |
35 | #ifdef ONIG_OPTION_IGNORECASE | |
36 | { "ONIG_OPTION_IGNORECASE", ONIG_OPTION_IGNORECASE }, | |
37 | { "IGNORECASE", ONIG_OPTION_IGNORECASE }, | |
38 | #endif | |
39 | #ifdef ONIG_OPTION_EXTEND | |
40 | { "ONIG_OPTION_EXTEND", ONIG_OPTION_EXTEND }, | |
41 | { "EXTEND", ONIG_OPTION_EXTEND }, | |
42 | #endif | |
43 | #ifdef ONIG_OPTION_MULTILINE | |
44 | { "ONIG_OPTION_MULTILINE", ONIG_OPTION_MULTILINE }, | |
45 | { "MULTILINE", ONIG_OPTION_MULTILINE }, | |
46 | #endif | |
47 | #ifdef ONIG_OPTION_SINGLELINE | |
48 | { "ONIG_OPTION_SINGLELINE", ONIG_OPTION_SINGLELINE }, | |
49 | { "SINGLELINE", ONIG_OPTION_SINGLELINE }, | |
50 | #endif | |
51 | #ifdef ONIG_OPTION_FIND_LONGEST | |
52 | { "ONIG_OPTION_FIND_LONGEST", ONIG_OPTION_FIND_LONGEST }, | |
53 | { "FIND_LONGEST", ONIG_OPTION_FIND_LONGEST }, | |
54 | #endif | |
55 | #ifdef ONIG_OPTION_FIND_NOT_EMPTY | |
56 | { "ONIG_OPTION_FIND_NOT_EMPTY", ONIG_OPTION_FIND_NOT_EMPTY }, | |
57 | { "FIND_NOT_EMPTY", ONIG_OPTION_FIND_NOT_EMPTY }, | |
58 | #endif | |
59 | #ifdef ONIG_OPTION_NEGATE_SINGLELINE | |
60 | { "ONIG_OPTION_NEGATE_SINGLELINE", ONIG_OPTION_NEGATE_SINGLELINE }, | |
61 | { "NEGATE_SINGLELINE", ONIG_OPTION_NEGATE_SINGLELINE }, | |
62 | #endif | |
63 | #ifdef ONIG_OPTION_DONT_CAPTURE_GROUP | |
64 | { "ONIG_OPTION_DONT_CAPTURE_GROUP", ONIG_OPTION_DONT_CAPTURE_GROUP }, | |
65 | { "DONT_CAPTURE_GROUP", ONIG_OPTION_DONT_CAPTURE_GROUP }, | |
66 | #endif | |
67 | #ifdef ONIG_OPTION_CAPTURE_GROUP | |
68 | { "ONIG_OPTION_CAPTURE_GROUP", ONIG_OPTION_CAPTURE_GROUP }, | |
69 | { "CAPTURE_GROUP", ONIG_OPTION_CAPTURE_GROUP }, | |
70 | #endif | |
71 | #ifdef ONIG_OPTION_NOTBOL | |
72 | { "ONIG_OPTION_NOTBOL", ONIG_OPTION_NOTBOL }, | |
73 | { "NOTBOL", ONIG_OPTION_NOTBOL }, | |
74 | #endif | |
75 | #ifdef ONIG_OPTION_NOTEOL | |
76 | { "ONIG_OPTION_NOTEOL", ONIG_OPTION_NOTEOL }, | |
77 | { "NOTEOL", ONIG_OPTION_NOTEOL }, | |
78 | #endif | |
79 | #ifdef ONIG_OPTION_POSIX_REGION | |
80 | { "ONIG_OPTION_POSIX_REGION", ONIG_OPTION_POSIX_REGION }, | |
81 | { "POSIX_REGION", ONIG_OPTION_POSIX_REGION }, | |
82 | #endif | |
83 | #ifdef ONIG_OPTION_MAXBIT | |
84 | { "ONIG_OPTION_MAXBIT", ONIG_OPTION_MAXBIT }, | |
85 | { "MAXBIT", ONIG_OPTION_MAXBIT }, | |
86 | #endif | |
87 | #ifdef ONIG_SYN_OP_VARIABLE_META_CHARACTERS | |
88 | { "ONIG_SYN_OP_VARIABLE_META_CHARACTERS", ONIG_SYN_OP_VARIABLE_META_CHARACTERS }, | |
89 | #endif | |
90 | #ifdef ONIG_SYN_OP_DOT_ANYCHAR | |
91 | { "ONIG_SYN_OP_DOT_ANYCHAR", ONIG_SYN_OP_DOT_ANYCHAR }, | |
92 | #endif | |
93 | #ifdef ONIG_SYN_OP_ASTERISK_ZERO_INF | |
94 | { "ONIG_SYN_OP_ASTERISK_ZERO_INF", ONIG_SYN_OP_ASTERISK_ZERO_INF }, | |
95 | #endif | |
96 | #ifdef ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF | |
97 | { "ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF", ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF }, | |
98 | #endif | |
99 | #ifdef ONIG_SYN_OP_PLUS_ONE_INF | |
100 | { "ONIG_SYN_OP_PLUS_ONE_INF", ONIG_SYN_OP_PLUS_ONE_INF }, | |
101 | #endif | |
102 | #ifdef ONIG_SYN_OP_ESC_PLUS_ONE_INF | |
103 | { "ONIG_SYN_OP_ESC_PLUS_ONE_INF", ONIG_SYN_OP_ESC_PLUS_ONE_INF }, | |
104 | #endif | |
105 | #ifdef ONIG_SYN_OP_QMARK_ZERO_ONE | |
106 | { "ONIG_SYN_OP_QMARK_ZERO_ONE", ONIG_SYN_OP_QMARK_ZERO_ONE }, | |
107 | #endif | |
108 | #ifdef ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | |
109 | { "ONIG_SYN_OP_ESC_QMARK_ZERO_ONE", ONIG_SYN_OP_ESC_QMARK_ZERO_ONE }, | |
110 | #endif | |
111 | #ifdef ONIG_SYN_OP_BRACE_INTERVAL | |
112 | { "ONIG_SYN_OP_BRACE_INTERVAL", ONIG_SYN_OP_BRACE_INTERVAL }, | |
113 | #endif | |
114 | #ifdef ONIG_SYN_OP_ESC_BRACE_INTERVAL | |
115 | { "ONIG_SYN_OP_ESC_BRACE_INTERVAL", ONIG_SYN_OP_ESC_BRACE_INTERVAL }, | |
116 | #endif | |
117 | #ifdef ONIG_SYN_OP_VBAR_ALT | |
118 | { "ONIG_SYN_OP_VBAR_ALT", ONIG_SYN_OP_VBAR_ALT }, | |
119 | #endif | |
120 | #ifdef ONIG_SYN_OP_ESC_VBAR_ALT | |
121 | { "ONIG_SYN_OP_ESC_VBAR_ALT", ONIG_SYN_OP_ESC_VBAR_ALT }, | |
122 | #endif | |
123 | #ifdef ONIG_SYN_OP_LPAREN_SUBEXP | |
124 | { "ONIG_SYN_OP_LPAREN_SUBEXP", ONIG_SYN_OP_LPAREN_SUBEXP }, | |
125 | #endif | |
126 | #ifdef ONIG_SYN_OP_ESC_LPAREN_SUBEXP | |
127 | { "ONIG_SYN_OP_ESC_LPAREN_SUBEXP", ONIG_SYN_OP_ESC_LPAREN_SUBEXP }, | |
128 | #endif | |
129 | #ifdef ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | |
130 | { "ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR", ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR }, | |
131 | #endif | |
132 | #ifdef ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | |
133 | { "ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR", ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR }, | |
134 | #endif | |
135 | #ifdef ONIG_SYN_OP_DECIMAL_BACKREF | |
136 | { "ONIG_SYN_OP_DECIMAL_BACKREF", ONIG_SYN_OP_DECIMAL_BACKREF }, | |
137 | #endif | |
138 | #ifdef ONIG_SYN_OP_BRACKET_CC | |
139 | { "ONIG_SYN_OP_BRACKET_CC", ONIG_SYN_OP_BRACKET_CC }, | |
140 | #endif | |
141 | #ifdef ONIG_SYN_OP_ESC_W_WORD | |
142 | { "ONIG_SYN_OP_ESC_W_WORD", ONIG_SYN_OP_ESC_W_WORD }, | |
143 | #endif | |
144 | #ifdef ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | |
145 | { "ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END", ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END }, | |
146 | #endif | |
147 | #ifdef ONIG_SYN_OP_ESC_B_WORD_BOUND | |
148 | { "ONIG_SYN_OP_ESC_B_WORD_BOUND", ONIG_SYN_OP_ESC_B_WORD_BOUND }, | |
149 | #endif | |
150 | #ifdef ONIG_SYN_OP_ESC_S_WHITE_SPACE | |
151 | { "ONIG_SYN_OP_ESC_S_WHITE_SPACE", ONIG_SYN_OP_ESC_S_WHITE_SPACE }, | |
152 | #endif | |
153 | #ifdef ONIG_SYN_OP_ESC_D_DIGIT | |
154 | { "ONIG_SYN_OP_ESC_D_DIGIT", ONIG_SYN_OP_ESC_D_DIGIT }, | |
155 | #endif | |
156 | #ifdef ONIG_SYN_OP_LINE_ANCHOR | |
157 | { "ONIG_SYN_OP_LINE_ANCHOR", ONIG_SYN_OP_LINE_ANCHOR }, | |
158 | #endif | |
159 | #ifdef ONIG_SYN_OP_POSIX_BRACKET | |
160 | { "ONIG_SYN_OP_POSIX_BRACKET", ONIG_SYN_OP_POSIX_BRACKET }, | |
161 | #endif | |
162 | #ifdef ONIG_SYN_OP_QMARK_NON_GREEDY | |
163 | { "ONIG_SYN_OP_QMARK_NON_GREEDY", ONIG_SYN_OP_QMARK_NON_GREEDY }, | |
164 | #endif | |
165 | #ifdef ONIG_SYN_OP_ESC_CONTROL_CHARS | |
166 | { "ONIG_SYN_OP_ESC_CONTROL_CHARS", ONIG_SYN_OP_ESC_CONTROL_CHARS }, | |
167 | #endif | |
168 | #ifdef ONIG_SYN_OP_ESC_C_CONTROL | |
169 | { "ONIG_SYN_OP_ESC_C_CONTROL", ONIG_SYN_OP_ESC_C_CONTROL }, | |
170 | #endif | |
171 | #ifdef ONIG_SYN_OP_ESC_OCTAL3 | |
172 | { "ONIG_SYN_OP_ESC_OCTAL3", ONIG_SYN_OP_ESC_OCTAL3 }, | |
173 | #endif | |
174 | #ifdef ONIG_SYN_OP_ESC_X_HEX2 | |
175 | { "ONIG_SYN_OP_ESC_X_HEX2", ONIG_SYN_OP_ESC_X_HEX2 }, | |
176 | #endif | |
177 | #ifdef ONIG_SYN_OP_ESC_X_BRACE_HEX8 | |
178 | { "ONIG_SYN_OP_ESC_X_BRACE_HEX8", ONIG_SYN_OP_ESC_X_BRACE_HEX8 }, | |
179 | #endif | |
180 | #ifdef ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | |
181 | { "ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE", ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE }, | |
182 | #endif | |
183 | #ifdef ONIG_SYN_OP2_QMARK_GROUP_EFFECT | |
184 | { "ONIG_SYN_OP2_QMARK_GROUP_EFFECT", ONIG_SYN_OP2_QMARK_GROUP_EFFECT }, | |
185 | #endif | |
186 | #ifdef ONIG_SYN_OP2_OPTION_PERL | |
187 | { "ONIG_SYN_OP2_OPTION_PERL", ONIG_SYN_OP2_OPTION_PERL }, | |
188 | #endif | |
189 | #ifdef ONIG_SYN_OP2_OPTION_RUBY | |
190 | { "ONIG_SYN_OP2_OPTION_RUBY", ONIG_SYN_OP2_OPTION_RUBY }, | |
191 | #endif | |
192 | #ifdef ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | |
193 | { "ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT", ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT }, | |
194 | #endif | |
195 | #ifdef ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | |
196 | { "ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL", ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL }, | |
197 | #endif | |
198 | #ifdef ONIG_SYN_OP2_CCLASS_SET_OP | |
199 | { "ONIG_SYN_OP2_CCLASS_SET_OP", ONIG_SYN_OP2_CCLASS_SET_OP }, | |
200 | #endif | |
201 | #ifdef ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | |
202 | { "ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP", ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP }, | |
203 | #endif | |
204 | #ifdef ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | |
205 | { "ONIG_SYN_OP2_ESC_K_NAMED_BACKREF", ONIG_SYN_OP2_ESC_K_NAMED_BACKREF }, | |
206 | #endif | |
207 | #ifdef ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | |
208 | { "ONIG_SYN_OP2_ESC_G_SUBEXP_CALL", ONIG_SYN_OP2_ESC_G_SUBEXP_CALL }, | |
209 | #endif | |
210 | #ifdef ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY | |
211 | { "ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY", ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY }, | |
212 | #endif | |
213 | #ifdef ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | |
214 | { "ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL", ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL }, | |
215 | #endif | |
216 | #ifdef ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | |
217 | { "ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META", ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META }, | |
218 | #endif | |
219 | #ifdef ONIG_SYN_OP2_ESC_V_VTAB | |
220 | { "ONIG_SYN_OP2_ESC_V_VTAB", ONIG_SYN_OP2_ESC_V_VTAB }, | |
221 | #endif | |
222 | #ifdef ONIG_SYN_OP2_ESC_U_HEX4 | |
223 | { "ONIG_SYN_OP2_ESC_U_HEX4", ONIG_SYN_OP2_ESC_U_HEX4 }, | |
224 | #endif | |
225 | #ifdef ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR | |
226 | { "ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR", ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR }, | |
227 | #endif | |
228 | #ifdef ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | |
229 | { "ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY", ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY }, | |
230 | #endif | |
231 | #ifdef ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | |
232 | { "ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT", ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT }, | |
233 | #endif | |
234 | #ifdef ONIG_SYN_OP2_ESC_H_XDIGIT | |
235 | { "ONIG_SYN_OP2_ESC_H_XDIGIT", ONIG_SYN_OP2_ESC_H_XDIGIT }, | |
236 | #endif | |
237 | #ifdef ONIG_SYN_OP2_INEFFECTIVE_ESCAPE | |
238 | { "ONIG_SYN_OP2_INEFFECTIVE_ESCAPE", ONIG_SYN_OP2_INEFFECTIVE_ESCAPE }, | |
239 | #endif | |
240 | #ifdef ONIG_SYN_CONTEXT_INDEP_ANCHORS | |
241 | { "ONIG_SYN_CONTEXT_INDEP_ANCHORS", ONIG_SYN_CONTEXT_INDEP_ANCHORS }, | |
242 | #endif | |
243 | #ifdef ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | |
244 | { "ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS", ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS }, | |
245 | #endif | |
246 | #ifdef ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | |
247 | { "ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS", ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS }, | |
248 | #endif | |
249 | #ifdef ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | |
250 | { "ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP", ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP }, | |
251 | #endif | |
252 | #ifdef ONIG_SYN_ALLOW_INVALID_INTERVAL | |
253 | { "ONIG_SYN_ALLOW_INVALID_INTERVAL", ONIG_SYN_ALLOW_INVALID_INTERVAL }, | |
254 | #endif | |
255 | #ifdef ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | |
256 | { "ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV", ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV }, | |
257 | #endif | |
258 | #ifdef ONIG_SYN_STRICT_CHECK_BACKREF | |
259 | { "ONIG_SYN_STRICT_CHECK_BACKREF", ONIG_SYN_STRICT_CHECK_BACKREF }, | |
260 | #endif | |
261 | #ifdef ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | |
262 | { "ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND", ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND }, | |
263 | #endif | |
264 | #ifdef ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | |
265 | { "ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP", ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP }, | |
266 | #endif | |
267 | #ifdef ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | |
268 | { "ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME", ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME }, | |
269 | #endif | |
270 | #ifdef ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | |
271 | { "ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY", ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY }, | |
272 | #endif | |
273 | #ifdef ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC | |
274 | { "ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC", ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC }, | |
275 | #endif | |
276 | #ifdef ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | |
277 | { "ONIG_SYN_BACKSLASH_ESCAPE_IN_CC", ONIG_SYN_BACKSLASH_ESCAPE_IN_CC }, | |
278 | #endif | |
279 | #ifdef ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | |
280 | { "ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC", ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC }, | |
281 | #endif | |
282 | #ifdef ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC | |
283 | { "ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC", ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC }, | |
284 | #endif | |
285 | #ifdef ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | |
286 | { "ONIG_SYN_WARN_CC_OP_NOT_ESCAPED", ONIG_SYN_WARN_CC_OP_NOT_ESCAPED }, | |
287 | #endif | |
288 | #ifdef ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT | |
289 | { "ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT", ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT }, | |
290 | #endif | |
291 | #ifdef ONIG_META_CHAR_ESCAPE | |
292 | { "ONIG_META_CHAR_ESCAPE", ONIG_META_CHAR_ESCAPE }, | |
293 | #endif | |
294 | #ifdef ONIG_META_CHAR_ANYCHAR | |
295 | { "ONIG_META_CHAR_ANYCHAR", ONIG_META_CHAR_ANYCHAR }, | |
296 | #endif | |
297 | #ifdef ONIG_META_CHAR_ANYTIME | |
298 | { "ONIG_META_CHAR_ANYTIME", ONIG_META_CHAR_ANYTIME }, | |
299 | #endif | |
300 | #ifdef ONIG_META_CHAR_ZERO_OR_ONE_TIME | |
301 | { "ONIG_META_CHAR_ZERO_OR_ONE_TIME", ONIG_META_CHAR_ZERO_OR_ONE_TIME }, | |
302 | #endif | |
303 | #ifdef ONIG_META_CHAR_ONE_OR_MORE_TIME | |
304 | { "ONIG_META_CHAR_ONE_OR_MORE_TIME", ONIG_META_CHAR_ONE_OR_MORE_TIME }, | |
305 | #endif | |
306 | #ifdef ONIG_META_CHAR_ANYCHAR_ANYTIME | |
307 | { "ONIG_META_CHAR_ANYCHAR_ANYTIME", ONIG_META_CHAR_ANYCHAR_ANYTIME }, | |
308 | #endif | |
309 | #ifdef ONIG_INEFFECTIVE_META_CHAR | |
310 | { "ONIG_INEFFECTIVE_META_CHAR", ONIG_INEFFECTIVE_META_CHAR }, | |
311 | #endif | |
312 | #ifdef ONIG_NORMAL | |
313 | { "ONIG_NORMAL", ONIG_NORMAL }, | |
314 | #endif | |
315 | #ifdef ONIG_MISMATCH | |
316 | { "ONIG_MISMATCH", ONIG_MISMATCH }, | |
317 | #endif | |
318 | #ifdef ONIG_NO_SUPPORT_CONFIG | |
319 | { "ONIG_NO_SUPPORT_CONFIG", ONIG_NO_SUPPORT_CONFIG }, | |
320 | #endif | |
321 | #ifdef ONIG_MAX_CAPTURE_HISTORY_GROUP | |
322 | { "ONIG_MAX_CAPTURE_HISTORY_GROUP", ONIG_MAX_CAPTURE_HISTORY_GROUP }, | |
323 | #endif | |
324 | #ifdef ONIG_TRAVERSE_CALLBACK_AT_FIRST | |
325 | { "ONIG_TRAVERSE_CALLBACK_AT_FIRST", ONIG_TRAVERSE_CALLBACK_AT_FIRST }, | |
326 | #endif | |
327 | #ifdef ONIG_TRAVERSE_CALLBACK_AT_LAST | |
328 | { "ONIG_TRAVERSE_CALLBACK_AT_LAST", ONIG_TRAVERSE_CALLBACK_AT_LAST }, | |
329 | #endif | |
330 | #ifdef ONIG_TRAVERSE_CALLBACK_AT_BOTH | |
331 | { "ONIG_TRAVERSE_CALLBACK_AT_BOTH", ONIG_TRAVERSE_CALLBACK_AT_BOTH }, | |
332 | #endif | |
333 | #ifdef ONIG_REGION_NOTPOS | |
334 | { "ONIG_REGION_NOTPOS", ONIG_REGION_NOTPOS }, | |
335 | #endif | |
336 | #ifdef ONIG_CHAR_TABLE_SIZE | |
337 | { "ONIG_CHAR_TABLE_SIZE", ONIG_CHAR_TABLE_SIZE }, | |
338 | #endif | |
339 | #ifdef ONIG_STATE_NORMAL | |
340 | { "ONIG_STATE_NORMAL", ONIG_STATE_NORMAL }, | |
341 | #endif | |
342 | #ifdef ONIG_STATE_SEARCHING | |
343 | { "ONIG_STATE_SEARCHING", ONIG_STATE_SEARCHING }, | |
344 | #endif | |
345 | #ifdef ONIG_STATE_COMPILING | |
346 | { "ONIG_STATE_COMPILING", ONIG_STATE_COMPILING }, | |
347 | #endif | |
348 | #ifdef ONIG_STATE_MODIFY | |
349 | { "ONIG_STATE_MODIFY", ONIG_STATE_MODIFY }, | |
350 | #endif | |
351 | { NULL, 0 } | |
352 | }; | |
353 | ||
354 | static flag_pair onig_error_flags[] = { | |
355 | #ifdef ONIGERR_MEMORY | |
356 | { "ONIGERR_MEMORY", ONIGERR_MEMORY }, | |
357 | #endif | |
358 | #ifdef ONIGERR_TYPE_BUG | |
359 | { "ONIGERR_TYPE_BUG", ONIGERR_TYPE_BUG }, | |
360 | #endif | |
361 | #ifdef ONIGERR_PARSER_BUG | |
362 | { "ONIGERR_PARSER_BUG", ONIGERR_PARSER_BUG }, | |
363 | #endif | |
364 | #ifdef ONIGERR_STACK_BUG | |
365 | { "ONIGERR_STACK_BUG", ONIGERR_STACK_BUG }, | |
366 | #endif | |
367 | #ifdef ONIGERR_UNDEFINED_BYTECODE | |
368 | { "ONIGERR_UNDEFINED_BYTECODE", ONIGERR_UNDEFINED_BYTECODE }, | |
369 | #endif | |
370 | #ifdef ONIGERR_UNEXPECTED_BYTECODE | |
371 | { "ONIGERR_UNEXPECTED_BYTECODE", ONIGERR_UNEXPECTED_BYTECODE }, | |
372 | #endif | |
373 | #ifdef ONIGERR_MATCH_STACK_LIMIT_OVER | |
374 | { "ONIGERR_MATCH_STACK_LIMIT_OVER", ONIGERR_MATCH_STACK_LIMIT_OVER }, | |
375 | #endif | |
376 | #ifdef ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED | |
377 | { "ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED", ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED }, | |
378 | #endif | |
379 | #ifdef ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR | |
380 | { "ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR", ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR }, | |
381 | #endif | |
382 | #ifdef ONIGERR_INVALID_ARGUMENT | |
383 | { "ONIGERR_INVALID_ARGUMENT", ONIGERR_INVALID_ARGUMENT }, | |
384 | #endif | |
385 | #ifdef ONIGERR_END_PATTERN_AT_LEFT_BRACE | |
386 | { "ONIGERR_END_PATTERN_AT_LEFT_BRACE", ONIGERR_END_PATTERN_AT_LEFT_BRACE }, | |
387 | #endif | |
388 | #ifdef ONIGERR_END_PATTERN_AT_LEFT_BRACKET | |
389 | { "ONIGERR_END_PATTERN_AT_LEFT_BRACKET", ONIGERR_END_PATTERN_AT_LEFT_BRACKET }, | |
390 | #endif | |
391 | #ifdef ONIGERR_EMPTY_CHAR_CLASS | |
392 | { "ONIGERR_EMPTY_CHAR_CLASS", ONIGERR_EMPTY_CHAR_CLASS }, | |
393 | #endif | |
394 | #ifdef ONIGERR_PREMATURE_END_OF_CHAR_CLASS | |
395 | { "ONIGERR_PREMATURE_END_OF_CHAR_CLASS", ONIGERR_PREMATURE_END_OF_CHAR_CLASS }, | |
396 | #endif | |
397 | #ifdef ONIGERR_END_PATTERN_AT_ESCAPE | |
398 | { "ONIGERR_END_PATTERN_AT_ESCAPE", ONIGERR_END_PATTERN_AT_ESCAPE }, | |
399 | #endif | |
400 | #ifdef ONIGERR_END_PATTERN_AT_META | |
401 | { "ONIGERR_END_PATTERN_AT_META", ONIGERR_END_PATTERN_AT_META }, | |
402 | #endif | |
403 | #ifdef ONIGERR_END_PATTERN_AT_CONTROL | |
404 | { "ONIGERR_END_PATTERN_AT_CONTROL", ONIGERR_END_PATTERN_AT_CONTROL }, | |
405 | #endif | |
406 | #ifdef ONIGERR_META_CODE_SYNTAX | |
407 | { "ONIGERR_META_CODE_SYNTAX", ONIGERR_META_CODE_SYNTAX }, | |
408 | #endif | |
409 | #ifdef ONIGERR_CONTROL_CODE_SYNTAX | |
410 | { "ONIGERR_CONTROL_CODE_SYNTAX", ONIGERR_CONTROL_CODE_SYNTAX }, | |
411 | #endif | |
412 | #ifdef ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE | |
413 | { "ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE", ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE }, | |
414 | #endif | |
415 | #ifdef ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE | |
416 | { "ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE", ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE }, | |
417 | #endif | |
418 | #ifdef ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS | |
419 | { "ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS", ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS }, | |
420 | #endif | |
421 | #ifdef ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED | |
422 | { "ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED }, | |
423 | #endif | |
424 | #ifdef ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID | |
425 | { "ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID", ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID }, | |
426 | #endif | |
427 | #ifdef ONIGERR_NESTED_REPEAT_OPERATOR | |
428 | { "ONIGERR_NESTED_REPEAT_OPERATOR", ONIGERR_NESTED_REPEAT_OPERATOR }, | |
429 | #endif | |
430 | #ifdef ONIGERR_UNMATCHED_CLOSE_PARENTHESIS | |
431 | { "ONIGERR_UNMATCHED_CLOSE_PARENTHESIS", ONIGERR_UNMATCHED_CLOSE_PARENTHESIS }, | |
432 | #endif | |
433 | #ifdef ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS | |
434 | { "ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS", ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS }, | |
435 | #endif | |
436 | #ifdef ONIGERR_END_PATTERN_IN_GROUP | |
437 | { "ONIGERR_END_PATTERN_IN_GROUP", ONIGERR_END_PATTERN_IN_GROUP }, | |
438 | #endif | |
439 | #ifdef ONIGERR_UNDEFINED_GROUP_OPTION | |
440 | { "ONIGERR_UNDEFINED_GROUP_OPTION", ONIGERR_UNDEFINED_GROUP_OPTION }, | |
441 | #endif | |
442 | #ifdef ONIGERR_INVALID_POSIX_BRACKET_TYPE | |
443 | { "ONIGERR_INVALID_POSIX_BRACKET_TYPE", ONIGERR_INVALID_POSIX_BRACKET_TYPE }, | |
444 | #endif | |
445 | #ifdef ONIGERR_INVALID_LOOK_BEHIND_PATTERN | |
446 | { "ONIGERR_INVALID_LOOK_BEHIND_PATTERN", ONIGERR_INVALID_LOOK_BEHIND_PATTERN }, | |
447 | #endif | |
448 | #ifdef ONIGERR_INVALID_REPEAT_RANGE_PATTERN | |
449 | { "ONIGERR_INVALID_REPEAT_RANGE_PATTERN", ONIGERR_INVALID_REPEAT_RANGE_PATTERN }, | |
450 | #endif | |
451 | #ifdef ONIGERR_TOO_BIG_NUMBER | |
452 | { "ONIGERR_TOO_BIG_NUMBER", ONIGERR_TOO_BIG_NUMBER }, | |
453 | #endif | |
454 | #ifdef ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE | |
455 | { "ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE }, | |
456 | #endif | |
457 | #ifdef ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE | |
458 | { "ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE", ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE }, | |
459 | #endif | |
460 | #ifdef ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS | |
461 | { "ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS", ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS }, | |
462 | #endif | |
463 | #ifdef ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE | |
464 | { "ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE", ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE }, | |
465 | #endif | |
466 | #ifdef ONIGERR_TOO_MANY_MULTI_BYTE_RANGES | |
467 | { "ONIGERR_TOO_MANY_MULTI_BYTE_RANGES", ONIGERR_TOO_MANY_MULTI_BYTE_RANGES }, | |
468 | #endif | |
469 | #ifdef ONIGERR_TOO_SHORT_MULTI_BYTE_STRING | |
470 | { "ONIGERR_TOO_SHORT_MULTI_BYTE_STRING", ONIGERR_TOO_SHORT_MULTI_BYTE_STRING }, | |
471 | #endif | |
472 | #ifdef ONIGERR_TOO_BIG_BACKREF_NUMBER | |
473 | { "ONIGERR_TOO_BIG_BACKREF_NUMBER", ONIGERR_TOO_BIG_BACKREF_NUMBER }, | |
474 | #endif | |
475 | #ifdef ONIGERR_INVALID_BACKREF | |
476 | { "ONIGERR_INVALID_BACKREF", ONIGERR_INVALID_BACKREF }, | |
477 | #endif | |
478 | #ifdef ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED | |
479 | { "ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED", ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED }, | |
480 | #endif | |
481 | #ifdef ONIGERR_TOO_LONG_WIDE_CHAR_VALUE | |
482 | { "ONIGERR_TOO_LONG_WIDE_CHAR_VALUE", ONIGERR_TOO_LONG_WIDE_CHAR_VALUE }, | |
483 | #endif | |
484 | #ifdef ONIGERR_EMPTY_GROUP_NAME | |
485 | { "ONIGERR_EMPTY_GROUP_NAME", ONIGERR_EMPTY_GROUP_NAME }, | |
486 | #endif | |
487 | #ifdef ONIGERR_INVALID_GROUP_NAME | |
488 | { "ONIGERR_INVALID_GROUP_NAME", ONIGERR_INVALID_GROUP_NAME }, | |
489 | #endif | |
490 | #ifdef ONIGERR_INVALID_CHAR_IN_GROUP_NAME | |
491 | { "ONIGERR_INVALID_CHAR_IN_GROUP_NAME", ONIGERR_INVALID_CHAR_IN_GROUP_NAME }, | |
492 | #endif | |
493 | #ifdef ONIGERR_UNDEFINED_NAME_REFERENCE | |
494 | { "ONIGERR_UNDEFINED_NAME_REFERENCE", ONIGERR_UNDEFINED_NAME_REFERENCE }, | |
495 | #endif | |
496 | #ifdef ONIGERR_UNDEFINED_GROUP_REFERENCE | |
497 | { "ONIGERR_UNDEFINED_GROUP_REFERENCE", ONIGERR_UNDEFINED_GROUP_REFERENCE }, | |
498 | #endif | |
499 | #ifdef ONIGERR_MULTIPLEX_DEFINED_NAME | |
500 | { "ONIGERR_MULTIPLEX_DEFINED_NAME", ONIGERR_MULTIPLEX_DEFINED_NAME }, | |
501 | #endif | |
502 | #ifdef ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL | |
503 | { "ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL", ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL }, | |
504 | #endif | |
505 | #ifdef ONIGERR_NEVER_ENDING_RECURSION | |
506 | { "ONIGERR_NEVER_ENDING_RECURSION", ONIGERR_NEVER_ENDING_RECURSION }, | |
507 | #endif | |
508 | #ifdef ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY | |
509 | { "ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY", ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY }, | |
510 | #endif | |
511 | #ifdef ONIGERR_INVALID_CHAR_PROPERTY_NAME | |
512 | { "ONIGERR_INVALID_CHAR_PROPERTY_NAME", ONIGERR_INVALID_CHAR_PROPERTY_NAME }, | |
513 | #endif | |
514 | #ifdef ONIGERR_INVALID_CODE_POINT_VALUE | |
515 | { "ONIGERR_INVALID_CODE_POINT_VALUE", ONIGERR_INVALID_CODE_POINT_VALUE }, | |
516 | #endif | |
517 | #ifdef ONIGERR_INVALID_WIDE_CHAR_VALUE | |
518 | { "ONIGERR_INVALID_WIDE_CHAR_VALUE", ONIGERR_INVALID_WIDE_CHAR_VALUE }, | |
519 | #endif | |
520 | #ifdef ONIGERR_TOO_BIG_WIDE_CHAR_VALUE | |
521 | { "ONIGERR_TOO_BIG_WIDE_CHAR_VALUE", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE }, | |
522 | #endif | |
523 | #ifdef ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION | |
524 | { "ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION", ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION }, | |
525 | #endif | |
526 | #ifdef ONIGERR_INVALID_COMBINATION_OF_OPTIONS | |
527 | { "ONIGERR_INVALID_COMBINATION_OF_OPTIONS", ONIGERR_INVALID_COMBINATION_OF_OPTIONS }, | |
528 | #endif | |
529 | #ifdef ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT | |
530 | { "ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT", ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT }, | |
531 | #endif | |
532 | { NULL, 0 } | |
533 | }; | |
534 | ||
535 | int LOnig_get_flags (lua_State *L) { | |
536 | const flag_pair* fps[] = { onig_flags, onig_error_flags, NULL }; | |
537 | return get_flags (L, fps); | |
538 | } | |
539 |
0 | /* lpcre.c - Lua binding of PCRE library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <locale.h> | |
6 | #include <ctype.h> | |
7 | #include <stdint.h> | |
8 | #include <pcre.h> | |
9 | ||
10 | #include "lua.h" | |
11 | #include "lauxlib.h" | |
12 | #include "../common.h" | |
13 | ||
14 | extern int Lpcre_get_flags (lua_State *L); | |
15 | extern int Lpcre_config (lua_State *L); | |
16 | extern flag_pair pcre_error_flags[]; | |
17 | ||
18 | /* These 2 settings may be redefined from the command-line or the makefile. | |
19 | * They should be kept in sync between themselves and with the target name. | |
20 | */ | |
21 | #ifndef REX_LIBNAME | |
22 | # define REX_LIBNAME "rex_pcre" | |
23 | #endif | |
24 | #ifndef REX_OPENLIB | |
25 | # define REX_OPENLIB luaopen_rex_pcre | |
26 | #endif | |
27 | ||
28 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
29 | ||
30 | #define ALG_CFLAGS_DFLT 0 | |
31 | #define ALG_EFLAGS_DFLT 0 | |
32 | ||
33 | static int getcflags (lua_State *L, int pos); | |
34 | #define ALG_GETCFLAGS(L,pos) getcflags(L, pos) | |
35 | ||
36 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC); | |
37 | #define ALG_GETCARGS(a,b,c) checkarg_compile(a,b,c) | |
38 | ||
39 | #define ALG_NOMATCH(res) ((res) == PCRE_ERROR_NOMATCH) | |
40 | #define ALG_ISMATCH(res) ((res) >= 0) | |
41 | #define ALG_SUBBEG(ud,n) ud->match[n+n] | |
42 | #define ALG_SUBEND(ud,n) ud->match[n+n+1] | |
43 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) | |
44 | #define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) | |
45 | #define ALG_NSUB(ud) ((int)ud->ncapt) | |
46 | ||
47 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
48 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) | |
49 | ||
50 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
51 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
52 | ||
53 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) | |
54 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) | |
55 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
56 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
57 | ||
58 | #define ALG_BASE(st) 0 | |
59 | #define ALG_PULL | |
60 | ||
61 | typedef struct { | |
62 | pcre * pr; | |
63 | pcre_extra * extra; | |
64 | int * match; | |
65 | int ncapt; | |
66 | const unsigned char * tables; | |
67 | int freed; | |
68 | } TPcre; | |
69 | ||
70 | #define TUserdata TPcre | |
71 | ||
72 | #if PCRE_MAJOR >= 4 | |
73 | static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text); | |
74 | # define DO_NAMED_SUBPATTERNS do_named_subpatterns | |
75 | #endif | |
76 | ||
77 | #include "../algo.h" | |
78 | ||
79 | /* Locations of the 2 permanent tables in the function environment */ | |
80 | #define INDEX_CHARTABLES_META 1 /* chartables type's metatable */ | |
81 | #define INDEX_CHARTABLES_LINK 2 /* link chartables to compiled regex */ | |
82 | ||
83 | const char chartables_typename[] = "chartables"; | |
84 | ||
85 | /* Functions | |
86 | ****************************************************************************** | |
87 | */ | |
88 | ||
89 | static int getcflags (lua_State *L, int pos) { | |
90 | switch (lua_type (L, pos)) { | |
91 | case LUA_TNONE: | |
92 | case LUA_TNIL: | |
93 | return ALG_CFLAGS_DFLT; | |
94 | case LUA_TNUMBER: | |
95 | return lua_tointeger (L, pos); | |
96 | case LUA_TSTRING: { | |
97 | const char *s = lua_tostring (L, pos); | |
98 | int res = 0, ch; | |
99 | while ((ch = *s++) != '\0') { | |
100 | if (ch == 'i') res |= PCRE_CASELESS; | |
101 | else if (ch == 'm') res |= PCRE_MULTILINE; | |
102 | else if (ch == 's') res |= PCRE_DOTALL; | |
103 | else if (ch == 'x') res |= PCRE_EXTENDED; | |
104 | else if (ch == 'U') res |= PCRE_UNGREEDY; | |
105 | else if (ch == 'X') res |= PCRE_EXTRA; | |
106 | } | |
107 | return res; | |
108 | } | |
109 | default: | |
110 | return luaL_typerror (L, pos, "number or string"); | |
111 | } | |
112 | } | |
113 | ||
114 | static int generate_error (lua_State *L, const TPcre *ud, int errcode) { | |
115 | const char *key = get_flag_key (pcre_error_flags, errcode); | |
116 | (void) ud; | |
117 | if (key) | |
118 | return luaL_error (L, "error PCRE_%s", key); | |
119 | else | |
120 | return luaL_error (L, "PCRE error code %d", errcode); | |
121 | } | |
122 | ||
123 | #if PCRE_MAJOR >= 6 | |
124 | /* method r:dfa_exec (s, [st], [ef], [ovecsize], [wscount]) */ | |
125 | static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TPcre **ud) { | |
126 | *ud = check_ud (L); | |
127 | argE->text = luaL_checklstring (L, 2, &argE->textlen); | |
128 | argE->startoffset = get_startoffset (L, 3, argE->textlen); | |
129 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
130 | argE->ovecsize = (size_t)luaL_optinteger (L, 5, 100); | |
131 | argE->wscount = (size_t)luaL_optinteger (L, 6, 50); | |
132 | } | |
133 | #endif | |
134 | ||
135 | static void push_chartables_meta (lua_State *L) { | |
136 | lua_pushinteger (L, INDEX_CHARTABLES_META); | |
137 | lua_rawget (L, ALG_ENVIRONINDEX); | |
138 | } | |
139 | ||
140 | static int Lpcre_maketables (lua_State *L) { | |
141 | *(const void**)lua_newuserdata (L, sizeof(void*)) = pcre_maketables(); | |
142 | push_chartables_meta (L); | |
143 | lua_setmetatable (L, -2); | |
144 | return 1; | |
145 | } | |
146 | ||
147 | static void **check_chartables (lua_State *L, int pos) { | |
148 | void **q; | |
149 | /* Compare the metatable against the C function environment. */ | |
150 | if (lua_getmetatable(L, pos)) { | |
151 | push_chartables_meta (L); | |
152 | if (lua_rawequal(L, -1, -2) && | |
153 | (q = (void **)lua_touserdata(L, pos)) != NULL) { | |
154 | lua_pop(L, 2); | |
155 | return q; | |
156 | } | |
157 | } | |
158 | luaL_argerror(L, pos, lua_pushfstring (L, "not a %s", chartables_typename)); | |
159 | return NULL; | |
160 | } | |
161 | ||
162 | static int chartables_gc (lua_State *L) { | |
163 | void **ud = check_chartables (L, 1); | |
164 | if (*ud) { | |
165 | pcre_free (*ud); | |
166 | *ud = NULL; | |
167 | } | |
168 | return 0; | |
169 | } | |
170 | ||
171 | static int chartables_tostring (lua_State *L) { | |
172 | void **ud = check_chartables (L, 1); | |
173 | lua_pushfstring (L, "%s (%p)", chartables_typename, ud); | |
174 | return 1; | |
175 | } | |
176 | ||
177 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) { | |
178 | argC->locale = NULL; | |
179 | argC->tables = NULL; | |
180 | if (!lua_isnoneornil (L, pos)) { | |
181 | if (lua_isstring (L, pos)) | |
182 | argC->locale = lua_tostring (L, pos); | |
183 | else { | |
184 | argC->tablespos = pos; | |
185 | argC->tables = (const unsigned char*) *check_chartables (L, pos); | |
186 | } | |
187 | } | |
188 | } | |
189 | ||
190 | static int compile_regex (lua_State *L, const TArgComp *argC, TPcre **pud) { | |
191 | const char *error; | |
192 | int erroffset; | |
193 | TPcre *ud; | |
194 | const unsigned char *tables = NULL; | |
195 | ||
196 | ud = (TPcre*)lua_newuserdata (L, sizeof (TPcre)); | |
197 | memset (ud, 0, sizeof (TPcre)); /* initialize all members to 0 */ | |
198 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
199 | lua_setmetatable (L, -2); | |
200 | ||
201 | if (argC->locale) { | |
202 | char old_locale[256]; | |
203 | strcpy (old_locale, setlocale (LC_CTYPE, NULL)); /* store the locale */ | |
204 | if (NULL == setlocale (LC_CTYPE, argC->locale)) /* set new locale */ | |
205 | return luaL_error (L, "cannot set locale"); | |
206 | ud->tables = tables = pcre_maketables (); /* make tables with new locale */ | |
207 | setlocale (LC_CTYPE, old_locale); /* restore the old locale */ | |
208 | } | |
209 | else if (argC->tables) { | |
210 | tables = argC->tables; | |
211 | lua_pushinteger (L, INDEX_CHARTABLES_LINK); | |
212 | lua_rawget (L, ALG_ENVIRONINDEX); | |
213 | lua_pushvalue (L, -2); | |
214 | lua_pushvalue (L, argC->tablespos); | |
215 | lua_rawset (L, -3); | |
216 | lua_pop (L, 1); | |
217 | } | |
218 | ||
219 | ud->pr = pcre_compile (argC->pattern, argC->cflags, &error, &erroffset, tables); | |
220 | if (!ud->pr) | |
221 | return luaL_error (L, "%s (pattern offset: %d)", error, erroffset + 1); | |
222 | ||
223 | ud->extra = pcre_study (ud->pr, 0, &error); | |
224 | if (error) return luaL_error (L, "%s", error); | |
225 | ||
226 | pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_CAPTURECOUNT, &ud->ncapt); | |
227 | /* need (2 ints per capture, plus one for substring match) * 3/2 */ | |
228 | ud->match = (int *) Lmalloc (L, (ALG_NSUB(ud) + 1) * 3 * sizeof (int)); | |
229 | if (!ud->match) | |
230 | luaL_error (L, "malloc failed"); | |
231 | ||
232 | if (pud) *pud = ud; | |
233 | return 1; | |
234 | } | |
235 | ||
236 | #if PCRE_MAJOR >= 4 | |
237 | /* the target table must be on lua stack top */ | |
238 | static void do_named_subpatterns (lua_State *L, TPcre *ud, const char *text) { | |
239 | int i, namecount, name_entry_size; | |
240 | unsigned char *name_table, *tabptr; | |
241 | ||
242 | /* do named subpatterns - NJG */ | |
243 | pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_NAMECOUNT, &namecount); | |
244 | if (namecount <= 0) | |
245 | return; | |
246 | pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_NAMETABLE, &name_table); | |
247 | pcre_fullinfo (ud->pr, ud->extra, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); | |
248 | tabptr = name_table; | |
249 | for (i = 0; i < namecount; i++) { | |
250 | int n = (tabptr[0] << 8) | tabptr[1]; /* number of the capturing parenthesis */ | |
251 | if (n > 0 && n <= ALG_NSUB(ud)) { /* check range */ | |
252 | lua_pushstring (L, (char *)tabptr + 2); /* name of the capture, zero terminated */ | |
253 | ALG_PUSHSUB_OR_FALSE (L, ud, text, n); | |
254 | lua_rawset (L, -3); | |
255 | } | |
256 | tabptr += name_entry_size; | |
257 | } | |
258 | } | |
259 | #endif /* #if PCRE_MAJOR >= 4 */ | |
260 | ||
261 | #if PCRE_MAJOR >= 6 | |
262 | static int Lpcre_dfa_exec (lua_State *L) | |
263 | { | |
264 | TArgExec argE; | |
265 | TPcre *ud; | |
266 | int res; | |
267 | int *buf, *ovector, *wspace; | |
268 | size_t bufsize; | |
269 | ||
270 | checkarg_dfa_exec (L, &argE, &ud); | |
271 | bufsize = (argE.ovecsize + argE.wscount) * sizeof(int); | |
272 | buf = (int*) Lmalloc (L, bufsize); | |
273 | if (!buf) | |
274 | luaL_error (L, "malloc failed"); | |
275 | ovector = buf; | |
276 | wspace = buf + argE.ovecsize; | |
277 | ||
278 | res = pcre_dfa_exec (ud->pr, ud->extra, argE.text, (int)argE.textlen, | |
279 | argE.startoffset, argE.eflags, ovector, argE.ovecsize, wspace, argE.wscount); | |
280 | ||
281 | if (ALG_ISMATCH (res) || res == PCRE_ERROR_PARTIAL) { | |
282 | int i; | |
283 | int max = (res>0) ? res : (res==0) ? (int)argE.ovecsize/2 : 1; | |
284 | lua_pushinteger (L, ovector[0] + 1); /* 1-st return value */ | |
285 | lua_newtable (L); /* 2-nd return value */ | |
286 | for (i=0; i<max; i++) { | |
287 | lua_pushinteger (L, ovector[i+i+1]); | |
288 | lua_rawseti (L, -2, i+1); | |
289 | } | |
290 | lua_pushinteger (L, res); /* 3-rd return value */ | |
291 | Lfree (L, buf, bufsize); | |
292 | return 3; | |
293 | } | |
294 | else { | |
295 | Lfree (L, buf, bufsize); | |
296 | if (ALG_NOMATCH (res)) | |
297 | return lua_pushnil (L), 1; | |
298 | else | |
299 | return generate_error (L, ud, res); | |
300 | } | |
301 | } | |
302 | #endif /* #if PCRE_MAJOR >= 6 */ | |
303 | ||
304 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
305 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
306 | argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
307 | } | |
308 | ||
309 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
310 | lua_pushlstring (L, argE->text, argE->textlen); | |
311 | } | |
312 | ||
313 | static int findmatch_exec (TPcre *ud, TArgExec *argE) { | |
314 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
315 | argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
316 | } | |
317 | ||
318 | static int gsub_exec (TPcre *ud, TArgExec *argE, int st) { | |
319 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, | |
320 | st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
321 | } | |
322 | ||
323 | static int split_exec (TPcre *ud, TArgExec *argE, int offset) { | |
324 | return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset, | |
325 | argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3); | |
326 | } | |
327 | ||
328 | static int Lpcre_gc (lua_State *L) { | |
329 | TPcre *ud = check_ud (L); | |
330 | if (ud->freed == 0) { /* precaution against "manual" __gc calling */ | |
331 | ud->freed = 1; | |
332 | if (ud->pr) pcre_free (ud->pr); | |
333 | if (ud->extra) pcre_free (ud->extra); | |
334 | if (ud->tables) pcre_free ((void *)ud->tables); | |
335 | Lfree (L, ud->match, (ALG_NSUB(ud) + 1) * 3 * sizeof (int)); | |
336 | } | |
337 | return 0; | |
338 | } | |
339 | ||
340 | static int Lpcre_tostring (lua_State *L) { | |
341 | TPcre *ud = check_ud (L); | |
342 | if (ud->freed == 0) | |
343 | lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); | |
344 | else | |
345 | lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); | |
346 | return 1; | |
347 | } | |
348 | ||
349 | static int Lpcre_version (lua_State *L) { | |
350 | lua_pushstring (L, pcre_version ()); | |
351 | return 1; | |
352 | } | |
353 | ||
354 | #define SET_INFO_FIELD(L,ud,what,name,valtype) { \ | |
355 | valtype val; \ | |
356 | if (0 == pcre_fullinfo (ud->pr, ud->extra, what, &val)) { \ | |
357 | lua_pushnumber (L, val); \ | |
358 | lua_setfield (L, -2, name); \ | |
359 | } \ | |
360 | } | |
361 | ||
362 | static int Lpcre_fullinfo (lua_State *L) { | |
363 | TPcre *ud = check_ud (L); | |
364 | lua_newtable(L); | |
365 | ||
366 | SET_INFO_FIELD (L, ud, PCRE_INFO_BACKREFMAX, "BACKREFMAX", int) | |
367 | SET_INFO_FIELD (L, ud, PCRE_INFO_CAPTURECOUNT, "CAPTURECOUNT", int) | |
368 | SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTBYTE, "FIRSTBYTE", int) | |
369 | SET_INFO_FIELD (L, ud, PCRE_INFO_HASCRORLF, "HASCRORLF", int) | |
370 | SET_INFO_FIELD (L, ud, PCRE_INFO_JCHANGED, "JCHANGED", int) | |
371 | #ifdef PCRE_INFO_JIT | |
372 | SET_INFO_FIELD (L, ud, PCRE_INFO_JIT, "JIT", int) | |
373 | #endif | |
374 | #ifdef PCRE_INFO_JITSIZE | |
375 | SET_INFO_FIELD (L, ud, PCRE_INFO_JITSIZE, "JITSIZE", size_t); | |
376 | #endif | |
377 | #ifdef PCRE_INFO_MATCH_EMPTY | |
378 | SET_INFO_FIELD (L, ud, PCRE_INFO_MATCH_EMPTY, "MATCH_EMPTY", int) | |
379 | #endif | |
380 | #ifdef PCRE_INFO_MATCHLIMIT | |
381 | SET_INFO_FIELD (L, ud, PCRE_INFO_MATCHLIMIT, "MATCHLIMIT", uint32_t) | |
382 | #endif | |
383 | #ifdef PCRE_INFO_MAXLOOKBEHIND | |
384 | SET_INFO_FIELD (L, ud, PCRE_INFO_MAXLOOKBEHIND, "MAXLOOKBEHIND", int) /* int ? */ | |
385 | #endif | |
386 | #ifdef PCRE_INFO_MINLENGTH | |
387 | SET_INFO_FIELD (L, ud, PCRE_INFO_MINLENGTH, "MINLENGTH", int) | |
388 | #endif | |
389 | SET_INFO_FIELD (L, ud, PCRE_INFO_OKPARTIAL, "OKPARTIAL", int) | |
390 | SET_INFO_FIELD (L, ud, PCRE_INFO_OPTIONS, "OPTIONS", unsigned long) | |
391 | #ifdef PCRE_INFO_RECURSIONLIMIT | |
392 | SET_INFO_FIELD (L, ud, PCRE_INFO_RECURSIONLIMIT, "RECURSIONLIMIT", uint32_t) | |
393 | #endif | |
394 | SET_INFO_FIELD (L, ud, PCRE_INFO_SIZE, "SIZE", size_t) | |
395 | SET_INFO_FIELD (L, ud, PCRE_INFO_STUDYSIZE, "STUDYSIZE", size_t) | |
396 | #ifdef PCRE_INFO_FIRSTCHARACTERFLAGS | |
397 | SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTCHARACTERFLAGS, "FIRSTCHARACTERFLAGS", int) | |
398 | #endif | |
399 | #ifdef PCRE_INFO_FIRSTCHARACTER | |
400 | SET_INFO_FIELD (L, ud, PCRE_INFO_FIRSTCHARACTER, "FIRSTCHARACTER", uint32_t) | |
401 | #endif | |
402 | #ifdef PCRE_INFO_REQUIREDCHARFLAGS | |
403 | SET_INFO_FIELD (L, ud, PCRE_INFO_REQUIREDCHARFLAGS, "REQUIREDCHARFLAGS", int) | |
404 | #endif | |
405 | #ifdef PCRE_INFO_REQUIREDCHAR | |
406 | SET_INFO_FIELD (L, ud, PCRE_INFO_REQUIREDCHAR, "REQUIREDCHAR", uint32_t) | |
407 | #endif | |
408 | ||
409 | return 1; | |
410 | } | |
411 | ||
412 | static const luaL_Reg chartables_meta[] = { | |
413 | { "__gc", chartables_gc }, | |
414 | { "__tostring", chartables_tostring }, | |
415 | { NULL, NULL } | |
416 | }; | |
417 | ||
418 | static const luaL_Reg r_methods[] = { | |
419 | { "exec", algm_exec }, | |
420 | { "tfind", algm_tfind }, /* old name: match */ | |
421 | { "find", algm_find }, | |
422 | { "match", algm_match }, | |
423 | #if PCRE_MAJOR >= 6 | |
424 | { "dfa_exec", Lpcre_dfa_exec }, | |
425 | #endif | |
426 | { "fullinfo", Lpcre_fullinfo }, | |
427 | { "__gc", Lpcre_gc }, | |
428 | { "__tostring", Lpcre_tostring }, | |
429 | { NULL, NULL } | |
430 | }; | |
431 | ||
432 | static const luaL_Reg r_functions[] = { | |
433 | { "match", algf_match }, | |
434 | { "find", algf_find }, | |
435 | { "gmatch", algf_gmatch }, | |
436 | { "gsub", algf_gsub }, | |
437 | { "count", algf_count }, | |
438 | { "split", algf_split }, | |
439 | { "new", algf_new }, | |
440 | { "flags", Lpcre_get_flags }, | |
441 | { "version", Lpcre_version }, | |
442 | { "maketables", Lpcre_maketables }, | |
443 | #if PCRE_MAJOR >= 4 | |
444 | { "config", Lpcre_config }, | |
445 | #endif | |
446 | { NULL, NULL } | |
447 | }; | |
448 | ||
449 | /* Open the library */ | |
450 | REX_API int REX_OPENLIB (lua_State *L) { | |
451 | if (PCRE_MAJOR > atoi (pcre_version ())) { | |
452 | return luaL_error (L, "%s requires at least version %d of PCRE library", | |
453 | REX_LIBNAME, (int)PCRE_MAJOR); | |
454 | } | |
455 | ||
456 | alg_register(L, r_methods, r_functions, "PCRE"); | |
457 | ||
458 | /* create a table and register it as a metatable for "chartables" userdata */ | |
459 | lua_newtable (L); | |
460 | lua_pushliteral (L, "access denied"); | |
461 | lua_setfield (L, -2, "__metatable"); | |
462 | #if LUA_VERSION_NUM == 501 | |
463 | luaL_register (L, NULL, chartables_meta); | |
464 | lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_META); | |
465 | #else | |
466 | lua_pushvalue(L, -3); | |
467 | luaL_setfuncs (L, chartables_meta, 1); | |
468 | lua_rawseti (L, -3, INDEX_CHARTABLES_META); | |
469 | #endif | |
470 | ||
471 | /* create a table for connecting "chartables" userdata to "regex" userdata */ | |
472 | lua_newtable (L); | |
473 | lua_pushliteral (L, "k"); /* weak keys */ | |
474 | lua_setfield (L, -2, "__mode"); | |
475 | lua_pushvalue (L, -1); /* setmetatable (tb, tb) */ | |
476 | lua_setmetatable (L, -2); | |
477 | #if LUA_VERSION_NUM == 501 | |
478 | lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_LINK); | |
479 | #else | |
480 | lua_rawseti (L, -3, INDEX_CHARTABLES_LINK); | |
481 | #endif | |
482 | ||
483 | return 1; | |
484 | } |
0 | /* lpcre.c - PCRE regular expression library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <pcre.h> | |
4 | #include "lua.h" | |
5 | #include "lauxlib.h" | |
6 | #include "../common.h" | |
7 | ||
8 | #define VERSION_PCRE (PCRE_MAJOR*100 + PCRE_MINOR) | |
9 | ||
10 | static flag_pair pcre_flags[] = { | |
11 | { "MAJOR", PCRE_MAJOR }, | |
12 | { "MINOR", PCRE_MINOR }, | |
13 | /*---------------------------------------------------------------------------*/ | |
14 | { "CASELESS", PCRE_CASELESS }, | |
15 | { "MULTILINE", PCRE_MULTILINE }, | |
16 | { "DOTALL", PCRE_DOTALL }, | |
17 | { "EXTENDED", PCRE_EXTENDED }, | |
18 | { "ANCHORED", PCRE_ANCHORED }, | |
19 | { "DOLLAR_ENDONLY", PCRE_DOLLAR_ENDONLY }, | |
20 | { "EXTRA", PCRE_EXTRA }, | |
21 | { "NOTBOL", PCRE_NOTBOL }, | |
22 | { "NOTEOL", PCRE_NOTEOL }, | |
23 | { "UNGREEDY", PCRE_UNGREEDY }, | |
24 | { "NOTEMPTY", PCRE_NOTEMPTY }, | |
25 | { "UTF8", PCRE_UTF8 }, | |
26 | #if VERSION_PCRE >= 400 | |
27 | { "NO_AUTO_CAPTURE", PCRE_NO_AUTO_CAPTURE }, | |
28 | { "NO_UTF8_CHECK", PCRE_NO_UTF8_CHECK }, | |
29 | #endif | |
30 | #if VERSION_PCRE >= 500 | |
31 | { "AUTO_CALLOUT", PCRE_AUTO_CALLOUT }, | |
32 | { "PARTIAL", PCRE_PARTIAL }, | |
33 | #endif | |
34 | #ifdef PCRE_PARTIAL_SOFT | |
35 | { "PARTIAL_SOFT", PCRE_PARTIAL_SOFT }, | |
36 | #endif | |
37 | #if VERSION_PCRE >= 600 | |
38 | { "DFA_SHORTEST", PCRE_DFA_SHORTEST }, | |
39 | { "DFA_RESTART", PCRE_DFA_RESTART }, | |
40 | { "FIRSTLINE", PCRE_FIRSTLINE }, | |
41 | #endif | |
42 | #if VERSION_PCRE >= 607 | |
43 | { "DUPNAMES", PCRE_DUPNAMES }, | |
44 | { "NEWLINE_CR", PCRE_NEWLINE_CR }, | |
45 | { "NEWLINE_LF", PCRE_NEWLINE_LF }, | |
46 | { "NEWLINE_CRLF", PCRE_NEWLINE_CRLF }, | |
47 | #endif | |
48 | #if VERSION_PCRE >= 700 | |
49 | { "NEWLINE_ANY", PCRE_NEWLINE_ANY }, | |
50 | #endif | |
51 | #if VERSION_PCRE >= 701 | |
52 | { "NEWLINE_ANYCRLF", PCRE_NEWLINE_ANYCRLF }, | |
53 | #endif | |
54 | #if VERSION_PCRE >= 704 | |
55 | { "BSR_ANYCRLF", PCRE_BSR_ANYCRLF }, | |
56 | { "BSR_UNICODE", PCRE_BSR_UNICODE }, | |
57 | #endif | |
58 | #if VERSION_PCRE >= 707 | |
59 | { "JAVASCRIPT_COMPAT", PCRE_JAVASCRIPT_COMPAT }, | |
60 | #endif | |
61 | #ifdef PCRE_NO_START_OPTIMIZE | |
62 | { "NO_START_OPTIMIZE", PCRE_NO_START_OPTIMIZE }, | |
63 | #endif | |
64 | #ifdef PCRE_NO_START_OPTIMISE | |
65 | { "NO_START_OPTIMISE", PCRE_NO_START_OPTIMISE }, | |
66 | #endif | |
67 | #ifdef PCRE_PARTIAL_HARD | |
68 | { "PARTIAL_HARD", PCRE_PARTIAL_HARD }, | |
69 | #endif | |
70 | #ifdef PCRE_NOTEMPTY_ATSTART | |
71 | { "NOTEMPTY_ATSTART", PCRE_NOTEMPTY_ATSTART }, | |
72 | #endif | |
73 | #ifdef PCRE_UCP | |
74 | { "UCP", PCRE_UCP }, | |
75 | #endif | |
76 | /*---------------------------------------------------------------------------*/ | |
77 | { "INFO_OPTIONS", PCRE_INFO_OPTIONS }, | |
78 | { "INFO_SIZE", PCRE_INFO_SIZE }, | |
79 | { "INFO_CAPTURECOUNT", PCRE_INFO_CAPTURECOUNT }, | |
80 | { "INFO_BACKREFMAX", PCRE_INFO_BACKREFMAX }, | |
81 | #if VERSION_PCRE >= 400 | |
82 | { "INFO_FIRSTBYTE", PCRE_INFO_FIRSTBYTE }, | |
83 | #endif | |
84 | { "INFO_FIRSTCHAR", PCRE_INFO_FIRSTCHAR }, | |
85 | { "INFO_FIRSTTABLE", PCRE_INFO_FIRSTTABLE }, | |
86 | { "INFO_LASTLITERAL", PCRE_INFO_LASTLITERAL }, | |
87 | #if VERSION_PCRE >= 400 | |
88 | { "INFO_NAMEENTRYSIZE", PCRE_INFO_NAMEENTRYSIZE }, | |
89 | { "INFO_NAMECOUNT", PCRE_INFO_NAMECOUNT }, | |
90 | { "INFO_NAMETABLE", PCRE_INFO_NAMETABLE }, | |
91 | { "INFO_STUDYSIZE", PCRE_INFO_STUDYSIZE }, | |
92 | #endif | |
93 | #if VERSION_PCRE >= 500 | |
94 | { "INFO_DEFAULT_TABLES", PCRE_INFO_DEFAULT_TABLES }, | |
95 | #endif | |
96 | #ifdef PCRE_INFO_OKPARTIAL | |
97 | { "INFO_OKPARTIAL", PCRE_INFO_OKPARTIAL }, | |
98 | #endif | |
99 | #ifdef PCRE_INFO_JCHANGED | |
100 | { "INFO_JCHANGED", PCRE_INFO_JCHANGED }, | |
101 | #endif | |
102 | #ifdef PCRE_INFO_HASCRORLF | |
103 | { "INFO_HASCRORLF", PCRE_INFO_HASCRORLF }, | |
104 | #endif | |
105 | #ifdef PCRE_INFO_MINLENGTH | |
106 | { "INFO_MINLENGTH", PCRE_INFO_MINLENGTH }, | |
107 | #endif | |
108 | #ifdef PCRE_INFO_JIT | |
109 | { "INFO_JIT", PCRE_INFO_JIT }, | |
110 | #endif | |
111 | #ifdef PCRE_INFO_JITSIZE | |
112 | { "INFO_JITSIZE", PCRE_INFO_JITSIZE }, | |
113 | #endif | |
114 | /*---------------------------------------------------------------------------*/ | |
115 | #if VERSION_PCRE >= 400 | |
116 | { "EXTRA_STUDY_DATA", PCRE_EXTRA_STUDY_DATA }, | |
117 | { "EXTRA_MATCH_LIMIT", PCRE_EXTRA_MATCH_LIMIT }, | |
118 | { "EXTRA_CALLOUT_DATA", PCRE_EXTRA_CALLOUT_DATA }, | |
119 | #endif | |
120 | #if VERSION_PCRE >= 500 | |
121 | { "EXTRA_TABLES", PCRE_EXTRA_TABLES }, | |
122 | #endif | |
123 | #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION | |
124 | { "EXTRA_MATCH_LIMIT_RECURSION", PCRE_EXTRA_MATCH_LIMIT_RECURSION }, | |
125 | #endif | |
126 | #ifdef PCRE_EXTRA_MARK | |
127 | { "EXTRA_MARK", PCRE_EXTRA_MARK }, | |
128 | #endif | |
129 | /*---------------------------------------------------------------------------*/ | |
130 | { NULL, 0 } | |
131 | }; | |
132 | ||
133 | flag_pair pcre_error_flags[] = { | |
134 | { "ERROR_NOMATCH", PCRE_ERROR_NOMATCH }, | |
135 | { "ERROR_NULL", PCRE_ERROR_NULL }, | |
136 | { "ERROR_BADOPTION", PCRE_ERROR_BADOPTION }, | |
137 | { "ERROR_BADMAGIC", PCRE_ERROR_BADMAGIC }, | |
138 | #if VERSION_PCRE >= 700 | |
139 | { "ERROR_UNKNOWN_OPCODE", PCRE_ERROR_UNKNOWN_OPCODE }, | |
140 | #endif | |
141 | { "ERROR_UNKNOWN_NODE", PCRE_ERROR_UNKNOWN_NODE }, | |
142 | { "ERROR_NOMEMORY", PCRE_ERROR_NOMEMORY }, | |
143 | { "ERROR_NOSUBSTRING", PCRE_ERROR_NOSUBSTRING }, | |
144 | #if VERSION_PCRE >= 400 | |
145 | { "ERROR_MATCHLIMIT", PCRE_ERROR_MATCHLIMIT }, | |
146 | { "ERROR_CALLOUT", PCRE_ERROR_CALLOUT }, | |
147 | { "ERROR_BADUTF8", PCRE_ERROR_BADUTF8 }, | |
148 | { "ERROR_BADUTF8_OFFSET", PCRE_ERROR_BADUTF8_OFFSET }, | |
149 | #endif | |
150 | #if VERSION_PCRE >= 500 | |
151 | { "ERROR_PARTIAL", PCRE_ERROR_PARTIAL }, | |
152 | { "ERROR_BADPARTIAL", PCRE_ERROR_BADPARTIAL }, | |
153 | { "ERROR_INTERNAL", PCRE_ERROR_INTERNAL }, | |
154 | { "ERROR_BADCOUNT", PCRE_ERROR_BADCOUNT }, | |
155 | #endif | |
156 | #if VERSION_PCRE >= 600 | |
157 | { "ERROR_DFA_UITEM", PCRE_ERROR_DFA_UITEM }, | |
158 | { "ERROR_DFA_UCOND", PCRE_ERROR_DFA_UCOND }, | |
159 | { "ERROR_DFA_UMLIMIT", PCRE_ERROR_DFA_UMLIMIT }, | |
160 | { "ERROR_DFA_WSSIZE", PCRE_ERROR_DFA_WSSIZE }, | |
161 | { "ERROR_DFA_RECURSE", PCRE_ERROR_DFA_RECURSE }, | |
162 | #endif | |
163 | #if VERSION_PCRE >= 607 | |
164 | { "ERROR_RECURSIONLIMIT", PCRE_ERROR_RECURSIONLIMIT }, | |
165 | #endif | |
166 | #if VERSION_PCRE >= 700 | |
167 | { "ERROR_BADNEWLINE", PCRE_ERROR_BADNEWLINE }, | |
168 | #endif | |
169 | #ifdef PCRE_ERROR_NULLWSLIMIT | |
170 | { "ERROR_NULLWSLIMIT", PCRE_ERROR_NULLWSLIMIT }, | |
171 | #endif | |
172 | #ifdef PCRE_ERROR_BADOFFSET | |
173 | { "ERROR_BADOFFSET", PCRE_ERROR_BADOFFSET }, | |
174 | #endif | |
175 | #ifdef PCRE_ERROR_SHORTUTF8 | |
176 | { "ERROR_SHORTUTF8", PCRE_ERROR_SHORTUTF8 }, | |
177 | #endif | |
178 | #ifdef PCRE_ERROR_RECURSELOOP | |
179 | { "ERROR_RECURSELOOP", PCRE_ERROR_RECURSELOOP }, | |
180 | #endif | |
181 | /*---------------------------------------------------------------------------*/ | |
182 | { NULL, 0 } | |
183 | }; | |
184 | ||
185 | #if VERSION_PCRE >= 400 | |
186 | static flag_pair pcre_config_flags[] = { | |
187 | { "CONFIG_UTF8", PCRE_CONFIG_UTF8 }, | |
188 | { "CONFIG_NEWLINE", PCRE_CONFIG_NEWLINE }, | |
189 | { "CONFIG_LINK_SIZE", PCRE_CONFIG_LINK_SIZE }, | |
190 | { "CONFIG_POSIX_MALLOC_THRESHOLD", PCRE_CONFIG_POSIX_MALLOC_THRESHOLD }, | |
191 | { "CONFIG_MATCH_LIMIT", PCRE_CONFIG_MATCH_LIMIT }, | |
192 | { "CONFIG_STACKRECURSE", PCRE_CONFIG_STACKRECURSE }, | |
193 | #if VERSION_PCRE >= 500 | |
194 | { "CONFIG_UNICODE_PROPERTIES", PCRE_CONFIG_UNICODE_PROPERTIES }, | |
195 | #endif | |
196 | #if VERSION_PCRE >= 650 | |
197 | { "CONFIG_MATCH_LIMIT_RECURSION", PCRE_CONFIG_MATCH_LIMIT_RECURSION }, | |
198 | #endif | |
199 | #if VERSION_PCRE >= 704 | |
200 | { "CONFIG_BSR", PCRE_CONFIG_BSR }, | |
201 | #endif | |
202 | /*---------------------------------------------------------------------------*/ | |
203 | { NULL, 0 } | |
204 | }; | |
205 | ||
206 | int Lpcre_config (lua_State *L) { | |
207 | int val; | |
208 | flag_pair *fp; | |
209 | if (lua_istable (L, 1)) | |
210 | lua_settop (L, 1); | |
211 | else | |
212 | lua_newtable (L); | |
213 | for (fp = pcre_config_flags; fp->key; ++fp) { | |
214 | if (0 == pcre_config (fp->val, &val)) { | |
215 | lua_pushinteger (L, val); | |
216 | lua_setfield (L, -2, fp->key); | |
217 | } | |
218 | } | |
219 | return 1; | |
220 | } | |
221 | #endif /* #if VERSION_PCRE >= 400 */ | |
222 | ||
223 | int Lpcre_get_flags (lua_State *L) { | |
224 | const flag_pair* fps[] = { pcre_flags, pcre_error_flags, NULL }; | |
225 | return get_flags (L, fps); | |
226 | } | |
227 |
0 | /* lpcre2.c - Lua binding of PCRE2 library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <locale.h> | |
6 | #include <ctype.h> | |
7 | #include <stdint.h> | |
8 | #include <pcre2.h> | |
9 | ||
10 | #include "lua.h" | |
11 | #include "lauxlib.h" | |
12 | #include "../common.h" | |
13 | ||
14 | extern int Lpcre2_get_flags (lua_State *L); | |
15 | extern int Lpcre2_config (lua_State *L); | |
16 | extern flag_pair pcre2_error_flags[]; | |
17 | ||
18 | /* These 2 settings may be redefined from the command-line or the makefile. | |
19 | * They should be kept in sync between themselves and with the target name. | |
20 | */ | |
21 | #ifndef REX_LIBNAME | |
22 | # define REX_LIBNAME "rex_pcre2" | |
23 | #endif | |
24 | #ifndef REX_OPENLIB | |
25 | # define REX_OPENLIB luaopen_rex_pcre2 | |
26 | #endif | |
27 | ||
28 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
29 | ||
30 | #define ALG_CFLAGS_DFLT 0 | |
31 | #define ALG_EFLAGS_DFLT 0 | |
32 | ||
33 | static int getcflags (lua_State *L, int pos); | |
34 | #define ALG_GETCFLAGS(L,pos) getcflags(L, pos) | |
35 | ||
36 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC); | |
37 | #define ALG_GETCARGS(a,b,c) checkarg_compile(a,b,c) | |
38 | ||
39 | #define ALG_NOMATCH(res) ((res) == PCRE2_ERROR_NOMATCH) | |
40 | #define ALG_ISMATCH(res) ((res) >= 0) | |
41 | #define ALG_SUBBEG(ud,n) ((int)(ud)->ovector[(n)+(n)]) | |
42 | #define ALG_SUBEND(ud,n) ((int)(ud)->ovector[(n)+(n)+1]) | |
43 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND((ud),(n)) - ALG_SUBBEG((ud),(n))) | |
44 | #define ALG_SUBVALID(ud,n) (0 == pcre2_substring_length_bynumber((ud)->match_data, (n), NULL)) | |
45 | #define ALG_NSUB(ud) ((int)(ud)->ncapt) | |
46 | ||
47 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
48 | lua_pushlstring (L, (text) + ALG_SUBBEG((ud),(n)), ALG_SUBLEN((ud),(n))) | |
49 | ||
50 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
51 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
52 | ||
53 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) | |
54 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) | |
55 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
56 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
57 | ||
58 | #define ALG_BASE(st) 0 | |
59 | #define ALG_PULL | |
60 | ||
61 | typedef struct { | |
62 | pcre2_code *pr; | |
63 | pcre2_compile_context *ccontext; | |
64 | pcre2_match_data *match_data; | |
65 | PCRE2_SIZE *ovector; | |
66 | int ncapt; | |
67 | const unsigned char *tables; | |
68 | int freed; | |
69 | } TPcre2; | |
70 | ||
71 | #define TUserdata TPcre2 | |
72 | ||
73 | static void do_named_subpatterns (lua_State *L, TPcre2 *ud, const char *text); | |
74 | # define DO_NAMED_SUBPATTERNS do_named_subpatterns | |
75 | ||
76 | #include "../algo.h" | |
77 | ||
78 | /* Locations of the 2 permanent tables in the function environment */ | |
79 | #define INDEX_CHARTABLES_META 1 /* chartables type's metatable */ | |
80 | #define INDEX_CHARTABLES_LINK 2 /* link chartables to compiled regex */ | |
81 | ||
82 | const char chartables_typename[] = "chartables"; | |
83 | ||
84 | /* Functions | |
85 | ****************************************************************************** | |
86 | */ | |
87 | ||
88 | static int push_error_message (lua_State *L, int errorcode) //### is this function needed? | |
89 | { | |
90 | PCRE2_UCHAR buf[256]; | |
91 | if (pcre2_get_error_message(errorcode, buf, 256) > 0) | |
92 | { | |
93 | lua_pushstring(L, (const char*)buf); | |
94 | return 1; | |
95 | } | |
96 | return 0; | |
97 | } | |
98 | ||
99 | static int getcflags (lua_State *L, int pos) { | |
100 | switch (lua_type (L, pos)) { | |
101 | case LUA_TNONE: | |
102 | case LUA_TNIL: | |
103 | return ALG_CFLAGS_DFLT; | |
104 | case LUA_TNUMBER: | |
105 | return lua_tointeger (L, pos); | |
106 | case LUA_TSTRING: { | |
107 | const char *s = lua_tostring (L, pos); | |
108 | int res = 0, ch; | |
109 | while ((ch = *s++) != '\0') { | |
110 | if (ch == 'i') res |= PCRE2_CASELESS; | |
111 | else if (ch == 'm') res |= PCRE2_MULTILINE; | |
112 | else if (ch == 's') res |= PCRE2_DOTALL; | |
113 | else if (ch == 'x') res |= PCRE2_EXTENDED; | |
114 | else if (ch == 'U') res |= PCRE2_UNGREEDY; | |
115 | //else if (ch == 'X') res |= PCRE2_EXTRA; //### does not exist in PCRE2 -> reflect in manual | |
116 | } | |
117 | return res; | |
118 | } | |
119 | default: | |
120 | return luaL_typerror (L, pos, "number or string"); | |
121 | } | |
122 | } | |
123 | ||
124 | static int generate_error (lua_State *L, const TPcre2 *ud, int errcode) { | |
125 | const char *key = get_flag_key (pcre2_error_flags, errcode); | |
126 | (void) ud; | |
127 | if (key) | |
128 | return luaL_error (L, "error PCRE2_%s", key); | |
129 | else | |
130 | return luaL_error (L, "PCRE2 error code %d", errcode); | |
131 | } | |
132 | ||
133 | /* method r:dfa_exec (s, [st], [ef], [ovecsize], [wscount]) */ | |
134 | static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TPcre2 **ud) { | |
135 | *ud = check_ud (L); | |
136 | argE->text = luaL_checklstring (L, 2, &argE->textlen); | |
137 | argE->startoffset = get_startoffset (L, 3, argE->textlen); | |
138 | argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT); | |
139 | argE->ovecsize = (size_t)luaL_optinteger (L, 5, 100); | |
140 | argE->wscount = (size_t)luaL_optinteger (L, 6, 50); | |
141 | } | |
142 | ||
143 | static void push_chartables_meta (lua_State *L) { | |
144 | lua_pushinteger (L, INDEX_CHARTABLES_META); | |
145 | lua_rawget (L, ALG_ENVIRONINDEX); | |
146 | } | |
147 | ||
148 | static int Lpcre2_maketables (lua_State *L) { | |
149 | *(const void**)lua_newuserdata (L, sizeof(void*)) = pcre2_maketables(NULL); //### argument NULL | |
150 | push_chartables_meta (L); | |
151 | lua_setmetatable (L, -2); | |
152 | return 1; | |
153 | } | |
154 | ||
155 | static void **check_chartables (lua_State *L, int pos) { | |
156 | void **q; | |
157 | /* Compare the metatable against the C function environment. */ | |
158 | if (lua_getmetatable(L, pos)) { | |
159 | push_chartables_meta (L); | |
160 | if (lua_rawequal(L, -1, -2) && | |
161 | (q = (void **)lua_touserdata(L, pos)) != NULL) { | |
162 | lua_pop(L, 2); | |
163 | return q; | |
164 | } | |
165 | } | |
166 | luaL_argerror(L, pos, lua_pushfstring (L, "not a %s", chartables_typename)); | |
167 | return NULL; | |
168 | } | |
169 | ||
170 | static int chartables_gc (lua_State *L) { | |
171 | void **ud = check_chartables (L, 1); | |
172 | if (*ud) { | |
173 | free (*ud); //### free() should be called only if pcre2_maketables was called with NULL argument | |
174 | *ud = NULL; | |
175 | } | |
176 | return 0; | |
177 | } | |
178 | ||
179 | static int chartables_tostring (lua_State *L) { | |
180 | void **ud = check_chartables (L, 1); | |
181 | lua_pushfstring (L, "%s (%p)", chartables_typename, ud); | |
182 | return 1; | |
183 | } | |
184 | ||
185 | static void checkarg_compile (lua_State *L, int pos, TArgComp *argC) { | |
186 | argC->locale = NULL; | |
187 | argC->tables = NULL; | |
188 | if (!lua_isnoneornil (L, pos)) { | |
189 | if (lua_isstring (L, pos)) | |
190 | argC->locale = lua_tostring (L, pos); | |
191 | else { | |
192 | argC->tablespos = pos; | |
193 | argC->tables = (const unsigned char*) *check_chartables (L, pos); | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | static int compile_regex (lua_State *L, const TArgComp *argC, TPcre2 **pud) { | |
199 | int errcode; | |
200 | PCRE2_SIZE erroffset; | |
201 | TPcre2 *ud; | |
202 | ||
203 | ud = (TPcre2*)lua_newuserdata (L, sizeof (TPcre2)); | |
204 | memset (ud, 0, sizeof (TPcre2)); /* initialize all members to 0 */ | |
205 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
206 | lua_setmetatable (L, -2); | |
207 | ||
208 | ud->ccontext = pcre2_compile_context_create(NULL); | |
209 | if (ud->ccontext == NULL) | |
210 | return luaL_error (L, "malloc failed"); | |
211 | ||
212 | if (argC->locale) { | |
213 | char old_locale[256]; | |
214 | strcpy (old_locale, setlocale (LC_CTYPE, NULL)); /* store the locale */ | |
215 | if (NULL == setlocale (LC_CTYPE, argC->locale)) /* set new locale */ | |
216 | return luaL_error (L, "cannot set locale"); | |
217 | ud->tables = pcre2_maketables (NULL); /* make tables with new locale */ //### argument NULL | |
218 | pcre2_set_character_tables(ud->ccontext, ud->tables); | |
219 | setlocale (LC_CTYPE, old_locale); /* restore the old locale */ | |
220 | } | |
221 | else if (argC->tables) { | |
222 | pcre2_set_character_tables(ud->ccontext, argC->tables); | |
223 | lua_pushinteger (L, INDEX_CHARTABLES_LINK); | |
224 | lua_rawget (L, ALG_ENVIRONINDEX); | |
225 | lua_pushvalue (L, -2); | |
226 | lua_pushvalue (L, argC->tablespos); | |
227 | lua_rawset (L, -3); | |
228 | lua_pop (L, 1); | |
229 | } | |
230 | ||
231 | ud->pr = pcre2_compile ((PCRE2_SPTR)argC->pattern, argC->patlen, argC->cflags, &errcode, | |
232 | &erroffset, ud->ccontext); //### DOUBLE-CHECK ALL ARGUMENTS | |
233 | if (!ud->pr) { | |
234 | if (push_error_message(L, errcode)) | |
235 | return luaL_error (L, "%s (pattern offset: %d)", lua_tostring(L,-1), erroffset + 1); | |
236 | else | |
237 | return luaL_error (L, "%s (pattern offset: %d)", "pattern compile error", erroffset + 1); | |
238 | } | |
239 | ||
240 | if (0 != pcre2_pattern_info (ud->pr, PCRE2_INFO_CAPTURECOUNT, &ud->ncapt)) //### | |
241 | return luaL_error (L, "could not get pattern info"); | |
242 | ||
243 | /* need (2 ints per capture, plus one for substring match) * 3/2 */ | |
244 | ud->match_data = pcre2_match_data_create(ud->ncapt+1, NULL); //### CHECK ALL | |
245 | if (!ud->match_data) | |
246 | return luaL_error (L, "malloc failed"); | |
247 | ||
248 | ud->ovector = pcre2_get_ovector_pointer(ud->match_data); | |
249 | ||
250 | if (pud) *pud = ud; | |
251 | return 1; | |
252 | } | |
253 | ||
254 | /* the target table must be on lua stack top */ | |
255 | static void do_named_subpatterns (lua_State *L, TPcre2 *ud, const char *text) { | |
256 | int i, namecount, name_entry_size; | |
257 | unsigned char *name_table; | |
258 | PCRE2_SPTR tabptr; | |
259 | ||
260 | /* do named subpatterns - NJG */ | |
261 | pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMECOUNT, &namecount); | |
262 | if (namecount <= 0) | |
263 | return; | |
264 | pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMETABLE, &name_table); | |
265 | pcre2_pattern_info (ud->pr, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); | |
266 | tabptr = name_table; | |
267 | for (i = 0; i < namecount; i++) { | |
268 | int n = (tabptr[0] << 8) | tabptr[1]; /* number of the capturing parenthesis */ | |
269 | if (n > 0 && n <= ALG_NSUB(ud)) { /* check range */ | |
270 | lua_pushstring (L, (char *)tabptr + 2); /* name of the capture, zero terminated */ | |
271 | ALG_PUSHSUB_OR_FALSE (L, ud, text, n); | |
272 | lua_rawset (L, -3); | |
273 | } | |
274 | tabptr += name_entry_size; | |
275 | } | |
276 | } | |
277 | ||
278 | static int Lpcre2_dfa_exec (lua_State *L) | |
279 | { | |
280 | TArgExec argE; | |
281 | TPcre2 *ud; | |
282 | int res; | |
283 | int *wspace; | |
284 | size_t wsize; | |
285 | ||
286 | checkarg_dfa_exec (L, &argE, &ud); | |
287 | wsize = argE.wscount * sizeof(int); | |
288 | wspace = (int*) Lmalloc (L, wsize); | |
289 | if (!wspace) | |
290 | luaL_error (L, "malloc failed"); | |
291 | ||
292 | ud->match_data = pcre2_match_data_create(argE.ovecsize/2, NULL); //### CHECK ALL | |
293 | if (!ud->match_data) | |
294 | return luaL_error (L, "malloc failed"); | |
295 | ||
296 | res = pcre2_dfa_match (ud->pr, (PCRE2_SPTR)argE.text, argE.textlen, argE.startoffset, | |
297 | argE.eflags, ud->match_data, NULL, wspace, argE.wscount); //### CHECK ALL | |
298 | ||
299 | if (ALG_ISMATCH (res) || res == PCRE2_ERROR_PARTIAL) { | |
300 | int i; | |
301 | int max = (res>0) ? res : (res==0) ? (int)argE.ovecsize/2 : 1; | |
302 | PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(ud->match_data); | |
303 | ||
304 | lua_pushinteger (L, ovector[0] + 1); /* 1-st return value */ | |
305 | lua_newtable (L); /* 2-nd return value */ | |
306 | for (i=0; i<max; i++) { | |
307 | lua_pushinteger (L, ovector[i+i+1]); | |
308 | lua_rawseti (L, -2, i+1); | |
309 | } | |
310 | lua_pushinteger (L, res); /* 3-rd return value */ | |
311 | Lfree (L, wspace, wsize); | |
312 | return 3; | |
313 | } | |
314 | else { | |
315 | Lfree (L, wspace, wsize); | |
316 | if (ALG_NOMATCH (res)) | |
317 | return lua_pushnil (L), 1; | |
318 | else | |
319 | return generate_error (L, ud, res); | |
320 | } | |
321 | } | |
322 | ||
323 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
324 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
325 | argE->startoffset, argE->eflags, ud->match_data, NULL); //### | |
326 | } | |
327 | ||
328 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
329 | lua_pushlstring (L, argE->text, argE->textlen); | |
330 | } | |
331 | ||
332 | static int findmatch_exec (TPcre2 *ud, TArgExec *argE) { | |
333 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
334 | argE->startoffset, argE->eflags, ud->match_data, NULL); //### | |
335 | } | |
336 | ||
337 | static int gsub_exec (TPcre2 *ud, TArgExec *argE, int st) { | |
338 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
339 | st, argE->eflags, ud->match_data, NULL); //### | |
340 | } | |
341 | ||
342 | static int split_exec (TPcre2 *ud, TArgExec *argE, int offset) { | |
343 | return pcre2_match (ud->pr, (PCRE2_SPTR)argE->text, argE->textlen, | |
344 | offset, argE->eflags, ud->match_data, NULL); //### | |
345 | } | |
346 | ||
347 | static int Lpcre2_gc (lua_State *L) { | |
348 | TPcre2 *ud = check_ud (L); | |
349 | if (ud->freed == 0) { /* precaution against "manual" __gc calling */ | |
350 | ud->freed = 1; | |
351 | if (ud->pr) pcre2_code_free (ud->pr); | |
352 | //if (ud->tables) pcre_free ((void *)ud->tables); //### | |
353 | if (ud->ccontext) pcre2_compile_context_free (ud->ccontext); | |
354 | if (ud->match_data) pcre2_match_data_free (ud->match_data); | |
355 | } | |
356 | return 0; | |
357 | } | |
358 | ||
359 | static int Lpcre2_tostring (lua_State *L) { | |
360 | TPcre2 *ud = check_ud (L); | |
361 | if (ud->freed == 0) | |
362 | lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); | |
363 | else | |
364 | lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); | |
365 | return 1; | |
366 | } | |
367 | ||
368 | static int Lpcre2_version (lua_State *L) { | |
369 | char buf[64]; | |
370 | pcre2_config(PCRE2_CONFIG_VERSION, buf); | |
371 | lua_pushstring (L, buf); | |
372 | return 1; | |
373 | } | |
374 | ||
375 | //### TODO: document this method. | |
376 | //### TODO: write tests for this method. | |
377 | static int Lpcre2_jit_compile (lua_State *L) { | |
378 | TPcre2 *ud = check_ud (L); | |
379 | uint32_t options = (uint32_t) luaL_optinteger (L, 2, PCRE2_JIT_COMPLETE); | |
380 | int errcode = pcre2_jit_compile (ud->pr, options); | |
381 | if (errcode == 0) { | |
382 | lua_pushboolean(L, 1); | |
383 | return 1; | |
384 | } | |
385 | lua_pushboolean(L, 0); | |
386 | return 1 + push_error_message(L, errcode); | |
387 | } | |
388 | ||
389 | #define SET_INFO_FIELD(L,ud,what,name,valtype) { \ | |
390 | valtype val; \ | |
391 | if (0 == pcre2_pattern_info (ud->pr, what, &val)) { \ | |
392 | lua_pushnumber (L, val); \ | |
393 | lua_setfield (L, -2, name); \ | |
394 | } \ | |
395 | } | |
396 | ||
397 | static int Lpcre2_pattern_info (lua_State *L) { | |
398 | TPcre2 *ud = check_ud (L); | |
399 | lua_newtable(L); | |
400 | ||
401 | SET_INFO_FIELD (L, ud, PCRE2_INFO_ALLOPTIONS, "ALLOPTIONS", uint32_t) | |
402 | SET_INFO_FIELD (L, ud, PCRE2_INFO_ARGOPTIONS, "ARGOPTIONS", uint32_t) | |
403 | SET_INFO_FIELD (L, ud, PCRE2_INFO_BACKREFMAX, "BACKREFMAX", uint32_t) | |
404 | SET_INFO_FIELD (L, ud, PCRE2_INFO_BSR, "BSR", uint32_t) | |
405 | SET_INFO_FIELD (L, ud, PCRE2_INFO_CAPTURECOUNT, "CAPTURECOUNT", uint32_t) | |
406 | //### SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTBITMAP, "FIRSTBITMAP", ???) | |
407 | SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTCODETYPE, "FIRSTCODETYPE", uint32_t) | |
408 | SET_INFO_FIELD (L, ud, PCRE2_INFO_FIRSTCODEUNIT, "FIRSTCODEUNIT", uint32_t) | |
409 | SET_INFO_FIELD (L, ud, PCRE2_INFO_HASBACKSLASHC, "HASBACKSLASHC", uint32_t) | |
410 | SET_INFO_FIELD (L, ud, PCRE2_INFO_HASCRORLF, "HASCRORLF", uint32_t) | |
411 | SET_INFO_FIELD (L, ud, PCRE2_INFO_JCHANGED, "JCHANGED", uint32_t) | |
412 | SET_INFO_FIELD (L, ud, PCRE2_INFO_JITSIZE, "JITSIZE", size_t) | |
413 | SET_INFO_FIELD (L, ud, PCRE2_INFO_LASTCODETYPE, "LASTCODETYPE", uint32_t) | |
414 | SET_INFO_FIELD (L, ud, PCRE2_INFO_LASTCODEUNIT, "LASTCODEUNIT", uint32_t) | |
415 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MATCHEMPTY, "MATCHEMPTY", uint32_t) | |
416 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MATCHLIMIT, "MATCHLIMIT", uint32_t) | |
417 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MAXLOOKBEHIND, "MAXLOOKBEHIND", uint32_t) | |
418 | SET_INFO_FIELD (L, ud, PCRE2_INFO_MINLENGTH, "MINLENGTH", uint32_t) | |
419 | SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMECOUNT, "NAMECOUNT", uint32_t) | |
420 | SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMEENTRYSIZE, "NAMEENTRYSIZE", uint32_t) | |
421 | //### SET_INFO_FIELD (L, ud, PCRE2_INFO_NAMETABLE, "NAMETABLE", ???) | |
422 | SET_INFO_FIELD (L, ud, PCRE2_INFO_NEWLINE, "NEWLINE", uint32_t) | |
423 | SET_INFO_FIELD (L, ud, PCRE2_INFO_RECURSIONLIMIT, "RECURSIONLIMIT", uint32_t) | |
424 | SET_INFO_FIELD (L, ud, PCRE2_INFO_SIZE, "SIZE", size_t) | |
425 | ||
426 | return 1; | |
427 | } | |
428 | ||
429 | static const luaL_Reg chartables_meta[] = { | |
430 | { "__gc", chartables_gc }, | |
431 | { "__tostring", chartables_tostring }, | |
432 | { NULL, NULL } | |
433 | }; | |
434 | ||
435 | static const luaL_Reg r_methods[] = { | |
436 | { "exec", algm_exec }, | |
437 | { "tfind", algm_tfind }, /* old name: match */ | |
438 | { "find", algm_find }, | |
439 | { "match", algm_match }, | |
440 | { "dfa_exec", Lpcre2_dfa_exec }, | |
441 | { "patterninfo", Lpcre2_pattern_info }, //### document name change: fullinfo -> patterninfo | |
442 | { "fullinfo", Lpcre2_pattern_info }, //### compatibility name | |
443 | { "jit_compile", Lpcre2_jit_compile }, | |
444 | { "__gc", Lpcre2_gc }, | |
445 | { "__tostring", Lpcre2_tostring }, | |
446 | { NULL, NULL } | |
447 | }; | |
448 | ||
449 | static const luaL_Reg r_functions[] = { | |
450 | { "match", algf_match }, | |
451 | { "find", algf_find }, | |
452 | { "gmatch", algf_gmatch }, | |
453 | { "gsub", algf_gsub }, | |
454 | { "count", algf_count }, | |
455 | { "split", algf_split }, | |
456 | { "new", algf_new }, | |
457 | { "flags", Lpcre2_get_flags }, | |
458 | { "version", Lpcre2_version }, | |
459 | { "maketables", Lpcre2_maketables }, | |
460 | { "config", Lpcre2_config }, | |
461 | { NULL, NULL } | |
462 | }; | |
463 | ||
464 | /* Open the library */ | |
465 | REX_API int REX_OPENLIB (lua_State *L) { | |
466 | char buf_ver[64]; | |
467 | pcre2_config(PCRE2_CONFIG_VERSION, buf_ver); | |
468 | if (PCRE2_MAJOR > atoi (buf_ver)) { | |
469 | return luaL_error (L, "%s requires at least version %d of PCRE2 library", | |
470 | REX_LIBNAME, (int)PCRE2_MAJOR); | |
471 | } | |
472 | ||
473 | alg_register(L, r_methods, r_functions, "PCRE2"); | |
474 | ||
475 | /* create a table and register it as a metatable for "chartables" userdata */ | |
476 | lua_newtable (L); | |
477 | lua_pushliteral (L, "access denied"); | |
478 | lua_setfield (L, -2, "__metatable"); | |
479 | #if LUA_VERSION_NUM == 501 | |
480 | luaL_register (L, NULL, chartables_meta); | |
481 | lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_META); | |
482 | #else | |
483 | lua_pushvalue(L, -3); | |
484 | luaL_setfuncs (L, chartables_meta, 1); | |
485 | lua_rawseti (L, -3, INDEX_CHARTABLES_META); | |
486 | #endif | |
487 | ||
488 | /* create a table for connecting "chartables" userdata to "regex" userdata */ | |
489 | lua_newtable (L); | |
490 | lua_pushliteral (L, "k"); /* weak keys */ | |
491 | lua_setfield (L, -2, "__mode"); | |
492 | lua_pushvalue (L, -1); /* setmetatable (tb, tb) */ | |
493 | lua_setmetatable (L, -2); | |
494 | #if LUA_VERSION_NUM == 501 | |
495 | lua_rawseti (L, LUA_ENVIRONINDEX, INDEX_CHARTABLES_LINK); | |
496 | #else | |
497 | lua_rawseti (L, -3, INDEX_CHARTABLES_LINK); | |
498 | #endif | |
499 | ||
500 | return 1; | |
501 | } |
0 | /* lpcre2_f.c - Lua binding of PCRE2 library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <pcre2.h> | |
4 | #include "lua.h" | |
5 | #include "lauxlib.h" | |
6 | #include "../common.h" | |
7 | ||
8 | #define VERSION_PCRE2 (PCRE2_MAJOR*100 + PCRE2_MINOR) | |
9 | ||
10 | static flag_pair pcre2_flags[] = { | |
11 | { "MAJOR", PCRE2_MAJOR }, | |
12 | { "MINOR", PCRE2_MINOR }, | |
13 | /*---------------------------------------------------------------------------*/ | |
14 | { "ANCHORED", PCRE2_ANCHORED }, | |
15 | { "NO_UTF_CHECK", PCRE2_NO_UTF_CHECK }, | |
16 | { "ALLOW_EMPTY_CLASS", PCRE2_ALLOW_EMPTY_CLASS }, | |
17 | { "ALT_BSUX", PCRE2_ALT_BSUX }, | |
18 | { "AUTO_CALLOUT", PCRE2_AUTO_CALLOUT }, | |
19 | { "CASELESS", PCRE2_CASELESS }, | |
20 | { "DOLLAR_ENDONLY", PCRE2_DOLLAR_ENDONLY }, | |
21 | { "DOTALL", PCRE2_DOTALL }, | |
22 | { "DUPNAMES", PCRE2_DUPNAMES }, | |
23 | { "EXTENDED", PCRE2_EXTENDED }, | |
24 | { "FIRSTLINE", PCRE2_FIRSTLINE }, | |
25 | { "MATCH_UNSET_BACKREF", PCRE2_MATCH_UNSET_BACKREF }, | |
26 | { "MULTILINE", PCRE2_MULTILINE }, | |
27 | { "NEVER_UCP", PCRE2_NEVER_UCP }, | |
28 | { "NEVER_UTF", PCRE2_NEVER_UTF }, | |
29 | { "NO_AUTO_CAPTURE", PCRE2_NO_AUTO_CAPTURE }, | |
30 | { "NO_AUTO_POSSESS", PCRE2_NO_AUTO_POSSESS }, | |
31 | { "NO_DOTSTAR_ANCHOR", PCRE2_NO_DOTSTAR_ANCHOR }, | |
32 | { "NO_START_OPTIMIZE", PCRE2_NO_START_OPTIMIZE }, | |
33 | { "UCP", PCRE2_UCP }, | |
34 | { "UNGREEDY", PCRE2_UNGREEDY }, | |
35 | { "UTF", PCRE2_UTF }, | |
36 | { "NEVER_BACKSLASH_C", PCRE2_NEVER_BACKSLASH_C }, | |
37 | { "ALT_CIRCUMFLEX", PCRE2_ALT_CIRCUMFLEX }, | |
38 | { "ALT_VERBNAMES", PCRE2_ALT_VERBNAMES }, | |
39 | { "USE_OFFSET_LIMIT", PCRE2_USE_OFFSET_LIMIT }, | |
40 | { "JIT_COMPLETE", PCRE2_JIT_COMPLETE }, | |
41 | { "JIT_PARTIAL_SOFT", PCRE2_JIT_PARTIAL_SOFT }, | |
42 | { "JIT_PARTIAL_HARD", PCRE2_JIT_PARTIAL_HARD }, | |
43 | { "NOTBOL", PCRE2_NOTBOL }, | |
44 | { "NOTEOL", PCRE2_NOTEOL }, | |
45 | { "NOTEMPTY", PCRE2_NOTEMPTY }, | |
46 | { "NOTEMPTY_ATSTART", PCRE2_NOTEMPTY_ATSTART }, | |
47 | { "PARTIAL_SOFT", PCRE2_PARTIAL_SOFT }, | |
48 | { "PARTIAL_HARD", PCRE2_PARTIAL_HARD }, | |
49 | { "DFA_RESTART", PCRE2_DFA_RESTART }, | |
50 | { "DFA_SHORTEST", PCRE2_DFA_SHORTEST }, | |
51 | { "SUBSTITUTE_GLOBAL", PCRE2_SUBSTITUTE_GLOBAL }, | |
52 | { "SUBSTITUTE_EXTENDED", PCRE2_SUBSTITUTE_EXTENDED }, | |
53 | { "SUBSTITUTE_UNSET_EMPTY", PCRE2_SUBSTITUTE_UNSET_EMPTY }, | |
54 | { "SUBSTITUTE_UNKNOWN_UNSET", PCRE2_SUBSTITUTE_UNKNOWN_UNSET }, | |
55 | { "SUBSTITUTE_OVERFLOW_LENGTH", PCRE2_SUBSTITUTE_OVERFLOW_LENGTH }, | |
56 | #ifdef PCRE2_NO_JIT | |
57 | { "NO_JIT", PCRE2_NO_JIT }, | |
58 | #endif | |
59 | { "NEWLINE_CR", PCRE2_NEWLINE_CR }, | |
60 | { "NEWLINE_LF", PCRE2_NEWLINE_LF }, | |
61 | { "NEWLINE_CRLF", PCRE2_NEWLINE_CRLF }, | |
62 | { "NEWLINE_ANY", PCRE2_NEWLINE_ANY }, | |
63 | { "NEWLINE_ANYCRLF", PCRE2_NEWLINE_ANYCRLF }, | |
64 | { "BSR_UNICODE", PCRE2_BSR_UNICODE }, | |
65 | { "BSR_ANYCRLF", PCRE2_BSR_ANYCRLF }, | |
66 | /*---------------------------------------------------------------------------*/ | |
67 | { "INFO_ALLOPTIONS", PCRE2_INFO_ALLOPTIONS }, | |
68 | { "INFO_ARGOPTIONS", PCRE2_INFO_ARGOPTIONS }, | |
69 | { "INFO_BACKREFMAX", PCRE2_INFO_BACKREFMAX }, | |
70 | { "INFO_BSR", PCRE2_INFO_BSR }, | |
71 | { "INFO_CAPTURECOUNT", PCRE2_INFO_CAPTURECOUNT }, | |
72 | { "INFO_FIRSTCODEUNIT", PCRE2_INFO_FIRSTCODEUNIT }, | |
73 | { "INFO_FIRSTCODETYPE", PCRE2_INFO_FIRSTCODETYPE }, | |
74 | { "INFO_FIRSTBITMAP", PCRE2_INFO_FIRSTBITMAP }, | |
75 | { "INFO_HASCRORLF", PCRE2_INFO_HASCRORLF }, | |
76 | { "INFO_JCHANGED", PCRE2_INFO_JCHANGED }, | |
77 | { "INFO_JITSIZE", PCRE2_INFO_JITSIZE }, | |
78 | { "INFO_LASTCODEUNIT", PCRE2_INFO_LASTCODEUNIT }, | |
79 | { "INFO_LASTCODETYPE", PCRE2_INFO_LASTCODETYPE }, | |
80 | { "INFO_MATCHEMPTY", PCRE2_INFO_MATCHEMPTY }, | |
81 | { "INFO_MATCHLIMIT", PCRE2_INFO_MATCHLIMIT }, | |
82 | { "INFO_MAXLOOKBEHIND", PCRE2_INFO_MAXLOOKBEHIND }, | |
83 | { "INFO_MINLENGTH", PCRE2_INFO_MINLENGTH }, | |
84 | { "INFO_NAMECOUNT", PCRE2_INFO_NAMECOUNT }, | |
85 | { "INFO_NAMEENTRYSIZE", PCRE2_INFO_NAMEENTRYSIZE }, | |
86 | { "INFO_NAMETABLE", PCRE2_INFO_NAMETABLE }, | |
87 | { "INFO_NEWLINE", PCRE2_INFO_NEWLINE }, | |
88 | { "INFO_RECURSIONLIMIT", PCRE2_INFO_RECURSIONLIMIT }, | |
89 | { "INFO_SIZE", PCRE2_INFO_SIZE }, | |
90 | { "INFO_HASBACKSLASHC", PCRE2_INFO_HASBACKSLASHC }, | |
91 | /*---------------------------------------------------------------------------*/ | |
92 | { NULL, 0 } | |
93 | }; | |
94 | ||
95 | flag_pair pcre2_error_flags[] = { | |
96 | { "ERROR_NOMATCH", PCRE2_ERROR_NOMATCH }, | |
97 | { "ERROR_PARTIAL", PCRE2_ERROR_PARTIAL }, | |
98 | { "ERROR_UTF8_ERR1", PCRE2_ERROR_UTF8_ERR1 }, | |
99 | { "ERROR_UTF8_ERR2", PCRE2_ERROR_UTF8_ERR2 }, | |
100 | { "ERROR_UTF8_ERR3", PCRE2_ERROR_UTF8_ERR3 }, | |
101 | { "ERROR_UTF8_ERR4", PCRE2_ERROR_UTF8_ERR4 }, | |
102 | { "ERROR_UTF8_ERR5", PCRE2_ERROR_UTF8_ERR5 }, | |
103 | { "ERROR_UTF8_ERR6", PCRE2_ERROR_UTF8_ERR6 }, | |
104 | { "ERROR_UTF8_ERR7", PCRE2_ERROR_UTF8_ERR7 }, | |
105 | { "ERROR_UTF8_ERR8", PCRE2_ERROR_UTF8_ERR8 }, | |
106 | { "ERROR_UTF8_ERR9", PCRE2_ERROR_UTF8_ERR9 }, | |
107 | { "ERROR_UTF8_ERR10", PCRE2_ERROR_UTF8_ERR10 }, | |
108 | { "ERROR_UTF8_ERR11", PCRE2_ERROR_UTF8_ERR11 }, | |
109 | { "ERROR_UTF8_ERR12", PCRE2_ERROR_UTF8_ERR12 }, | |
110 | { "ERROR_UTF8_ERR13", PCRE2_ERROR_UTF8_ERR13 }, | |
111 | { "ERROR_UTF8_ERR14", PCRE2_ERROR_UTF8_ERR14 }, | |
112 | { "ERROR_UTF8_ERR15", PCRE2_ERROR_UTF8_ERR15 }, | |
113 | { "ERROR_UTF8_ERR16", PCRE2_ERROR_UTF8_ERR16 }, | |
114 | { "ERROR_UTF8_ERR17", PCRE2_ERROR_UTF8_ERR17 }, | |
115 | { "ERROR_UTF8_ERR18", PCRE2_ERROR_UTF8_ERR18 }, | |
116 | { "ERROR_UTF8_ERR19", PCRE2_ERROR_UTF8_ERR19 }, | |
117 | { "ERROR_UTF8_ERR20", PCRE2_ERROR_UTF8_ERR20 }, | |
118 | { "ERROR_UTF8_ERR21", PCRE2_ERROR_UTF8_ERR21 }, | |
119 | { "ERROR_UTF16_ERR1", PCRE2_ERROR_UTF16_ERR1 }, | |
120 | { "ERROR_UTF16_ERR2", PCRE2_ERROR_UTF16_ERR2 }, | |
121 | { "ERROR_UTF16_ERR3", PCRE2_ERROR_UTF16_ERR3 }, | |
122 | { "ERROR_UTF32_ERR1", PCRE2_ERROR_UTF32_ERR1 }, | |
123 | { "ERROR_UTF32_ERR2", PCRE2_ERROR_UTF32_ERR2 }, | |
124 | { "ERROR_BADDATA", PCRE2_ERROR_BADDATA }, | |
125 | { "ERROR_MIXEDTABLES", PCRE2_ERROR_MIXEDTABLES }, | |
126 | { "ERROR_BADMAGIC", PCRE2_ERROR_BADMAGIC }, | |
127 | { "ERROR_BADMODE", PCRE2_ERROR_BADMODE }, | |
128 | { "ERROR_BADOFFSET", PCRE2_ERROR_BADOFFSET }, | |
129 | { "ERROR_BADOPTION", PCRE2_ERROR_BADOPTION }, | |
130 | { "ERROR_BADREPLACEMENT", PCRE2_ERROR_BADREPLACEMENT }, | |
131 | { "ERROR_BADUTFOFFSET", PCRE2_ERROR_BADUTFOFFSET }, | |
132 | { "ERROR_CALLOUT", PCRE2_ERROR_CALLOUT }, | |
133 | { "ERROR_DFA_BADRESTART", PCRE2_ERROR_DFA_BADRESTART }, | |
134 | { "ERROR_DFA_RECURSE", PCRE2_ERROR_DFA_RECURSE }, | |
135 | { "ERROR_DFA_UCOND", PCRE2_ERROR_DFA_UCOND }, | |
136 | { "ERROR_DFA_UFUNC", PCRE2_ERROR_DFA_UFUNC }, | |
137 | { "ERROR_DFA_UITEM", PCRE2_ERROR_DFA_UITEM }, | |
138 | { "ERROR_DFA_WSSIZE", PCRE2_ERROR_DFA_WSSIZE }, | |
139 | { "ERROR_INTERNAL", PCRE2_ERROR_INTERNAL }, | |
140 | { "ERROR_JIT_BADOPTION", PCRE2_ERROR_JIT_BADOPTION }, | |
141 | { "ERROR_JIT_STACKLIMIT", PCRE2_ERROR_JIT_STACKLIMIT }, | |
142 | { "ERROR_MATCHLIMIT", PCRE2_ERROR_MATCHLIMIT }, | |
143 | { "ERROR_NOMEMORY", PCRE2_ERROR_NOMEMORY }, | |
144 | { "ERROR_NOSUBSTRING", PCRE2_ERROR_NOSUBSTRING }, | |
145 | { "ERROR_NOUNIQUESUBSTRING", PCRE2_ERROR_NOUNIQUESUBSTRING }, | |
146 | { "ERROR_NULL", PCRE2_ERROR_NULL }, | |
147 | { "ERROR_RECURSELOOP", PCRE2_ERROR_RECURSELOOP }, | |
148 | { "ERROR_RECURSIONLIMIT", PCRE2_ERROR_RECURSIONLIMIT }, | |
149 | { "ERROR_UNAVAILABLE", PCRE2_ERROR_UNAVAILABLE }, | |
150 | { "ERROR_UNSET", PCRE2_ERROR_UNSET }, | |
151 | { "ERROR_BADOFFSETLIMIT", PCRE2_ERROR_BADOFFSETLIMIT }, | |
152 | { "ERROR_BADREPESCAPE", PCRE2_ERROR_BADREPESCAPE }, | |
153 | { "ERROR_REPMISSINGBRACE", PCRE2_ERROR_REPMISSINGBRACE }, | |
154 | { "ERROR_BADSUBSTITUTION", PCRE2_ERROR_BADSUBSTITUTION }, | |
155 | { "ERROR_BADSUBSPATTERN", PCRE2_ERROR_BADSUBSPATTERN }, | |
156 | { "ERROR_TOOMANYREPLACE", PCRE2_ERROR_TOOMANYREPLACE }, | |
157 | #ifdef PCRE2_ERROR_BADSERIALIZEDDATA | |
158 | { "ERROR_BADSERIALIZEDDATA", PCRE2_ERROR_BADSERIALIZEDDATA }, | |
159 | #endif | |
160 | /*---------------------------------------------------------------------------*/ | |
161 | { NULL, 0 } | |
162 | }; | |
163 | ||
164 | static flag_pair pcre2_config_flags[] = { | |
165 | { "PCRE2_CONFIG_BSR", PCRE2_CONFIG_BSR }, | |
166 | { "PCRE2_CONFIG_JIT", PCRE2_CONFIG_JIT }, | |
167 | { "PCRE2_CONFIG_JITTARGET", PCRE2_CONFIG_JITTARGET }, | |
168 | { "PCRE2_CONFIG_LINKSIZE", PCRE2_CONFIG_LINKSIZE }, | |
169 | { "PCRE2_CONFIG_MATCHLIMIT", PCRE2_CONFIG_MATCHLIMIT }, | |
170 | { "PCRE2_CONFIG_NEWLINE", PCRE2_CONFIG_NEWLINE }, | |
171 | { "PCRE2_CONFIG_PARENSLIMIT", PCRE2_CONFIG_PARENSLIMIT }, | |
172 | { "PCRE2_CONFIG_RECURSIONLIMIT", PCRE2_CONFIG_RECURSIONLIMIT }, | |
173 | { "PCRE2_CONFIG_STACKRECURSE", PCRE2_CONFIG_STACKRECURSE }, | |
174 | { "PCRE2_CONFIG_UNICODE", PCRE2_CONFIG_UNICODE }, | |
175 | { "PCRE2_CONFIG_UNICODE_VERSION", PCRE2_CONFIG_UNICODE_VERSION }, | |
176 | { "PCRE2_CONFIG_VERSION", PCRE2_CONFIG_VERSION }, | |
177 | /*---------------------------------------------------------------------------*/ | |
178 | { NULL, 0 } | |
179 | }; | |
180 | ||
181 | int Lpcre2_config (lua_State *L) { | |
182 | flag_pair *fp; | |
183 | if (lua_istable (L, 1)) | |
184 | lua_settop (L, 1); | |
185 | else | |
186 | lua_newtable (L); | |
187 | for (fp = pcre2_config_flags; fp->key; ++fp) { | |
188 | if (fp->val == PCRE2_CONFIG_JITTARGET) { | |
189 | #if PCRE2_CODE_UNIT_WIDTH == 8 | |
190 | char buf[64]; | |
191 | if (PCRE2_ERROR_BADOPTION != pcre2_config (fp->val, buf)) { | |
192 | lua_pushstring (L, buf); | |
193 | lua_setfield (L, -2, fp->key); | |
194 | } | |
195 | #endif | |
196 | } | |
197 | else { | |
198 | int val; | |
199 | if (0 == pcre2_config (fp->val, &val)) { | |
200 | lua_pushinteger (L, val); | |
201 | lua_setfield (L, -2, fp->key); | |
202 | } | |
203 | } | |
204 | } | |
205 | return 1; | |
206 | } | |
207 | ||
208 | int Lpcre2_get_flags (lua_State *L) { | |
209 | const flag_pair* fps[] = { pcre2_flags, pcre2_error_flags, NULL }; | |
210 | return get_flags (L, fps); | |
211 | } | |
212 |
0 | /* lposix.c - Lua binding of POSIX regular expressions library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <ctype.h> | |
6 | #include "lua.h" | |
7 | #include "lauxlib.h" | |
8 | #include "../common.h" | |
9 | ||
10 | #ifndef REX_POSIX_INCLUDE | |
11 | # include <regex.h> | |
12 | #else | |
13 | # include REX_POSIX_INCLUDE | |
14 | #endif | |
15 | ||
16 | /* These 2 settings may be redefined from the command-line or the makefile. | |
17 | * They should be kept in sync between themselves and with the target name. | |
18 | */ | |
19 | #ifndef REX_LIBNAME | |
20 | # define REX_LIBNAME "rex_posix" | |
21 | #endif | |
22 | #ifndef REX_OPENLIB | |
23 | # define REX_OPENLIB luaopen_rex_posix | |
24 | #endif | |
25 | ||
26 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
27 | ||
28 | /* Test if regex.h corresponds to the extended POSIX library, i.e. H. Spencer's. | |
29 | This test may not work as intended if regex.h introduced REG_BASIC, etc. | |
30 | via enum rather than #define. | |
31 | If that's the case, add -DREX_POSIX_EXT in the makefile/command line. | |
32 | The same applies to REG_STARTEND. | |
33 | */ | |
34 | #ifndef REX_POSIX_EXT | |
35 | # if defined(REG_BASIC) && defined(REG_STARTEND) | |
36 | # define REX_POSIX_EXT | |
37 | # endif | |
38 | #endif | |
39 | ||
40 | #define ALG_CFLAGS_DFLT REG_EXTENDED | |
41 | #ifdef REG_STARTEND | |
42 | # define ALG_EFLAGS_DFLT REG_STARTEND | |
43 | #else | |
44 | # define ALG_EFLAGS_DFLT 0 | |
45 | #endif | |
46 | ||
47 | #define ALG_NOMATCH(res) ((res) == REG_NOMATCH) | |
48 | #define ALG_ISMATCH(res) ((res) == 0) | |
49 | #define ALG_SUBBEG(ud,n) ud->match[n].rm_so | |
50 | #define ALG_SUBEND(ud,n) ud->match[n].rm_eo | |
51 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) | |
52 | #define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) | |
53 | #ifdef REX_NSUB_BASE1 | |
54 | # define ALG_NSUB(ud) ((int)ud->r.re_nsub - 1) | |
55 | #else | |
56 | # define ALG_NSUB(ud) ((int)ud->r.re_nsub) | |
57 | #endif | |
58 | ||
59 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
60 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) | |
61 | ||
62 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
63 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
64 | ||
65 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) | |
66 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) | |
67 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
68 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
69 | ||
70 | #define ALG_BASE(st) (st) | |
71 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
72 | ||
73 | typedef struct { | |
74 | regex_t r; | |
75 | regmatch_t * match; | |
76 | int freed; | |
77 | } TPosix; | |
78 | ||
79 | #define TUserdata TPosix | |
80 | ||
81 | #include "../algo.h" | |
82 | ||
83 | /* Functions | |
84 | ****************************************************************************** | |
85 | */ | |
86 | ||
87 | static int generate_error (lua_State *L, const TPosix *ud, int errcode) { | |
88 | char errbuf[80]; | |
89 | regerror (errcode, &ud->r, errbuf, sizeof (errbuf)); | |
90 | return luaL_error (L, "%s", errbuf); | |
91 | } | |
92 | ||
93 | static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) { | |
94 | int res; | |
95 | TPosix *ud; | |
96 | ||
97 | ud = (TPosix *)lua_newuserdata (L, sizeof (TPosix)); | |
98 | memset (ud, 0, sizeof (TPosix)); /* initialize all members to 0 */ | |
99 | ||
100 | #ifdef REX_POSIX_EXT | |
101 | if (argC->cflags & REG_PEND) | |
102 | ud->r.re_endp = argC->pattern + argC->patlen; | |
103 | #endif | |
104 | ||
105 | res = regcomp (&ud->r, argC->pattern, argC->cflags); | |
106 | if (res != 0) | |
107 | return generate_error (L, ud, res); | |
108 | ||
109 | if (argC->cflags & REG_NOSUB) | |
110 | ud->r.re_nsub = 0; | |
111 | ud->match = (regmatch_t *) Lmalloc (L, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); | |
112 | if (!ud->match) | |
113 | luaL_error (L, "malloc failed"); | |
114 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
115 | lua_setmetatable (L, -2); | |
116 | ||
117 | if (pud) *pud = ud; | |
118 | return 1; | |
119 | } | |
120 | ||
121 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
122 | if (argE->startoffset > 0) | |
123 | argE->eflags |= REG_NOTBOL; | |
124 | ||
125 | #ifdef REG_STARTEND | |
126 | if (argE->eflags & REG_STARTEND) { | |
127 | ALG_SUBBEG(ud,0) = 0; | |
128 | ALG_SUBEND(ud,0) = argE->textlen - argE->startoffset; | |
129 | } | |
130 | #endif | |
131 | ||
132 | argE->text += argE->startoffset; | |
133 | return regexec (&ud->r, argE->text, ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
134 | } | |
135 | ||
136 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
137 | #ifdef REG_STARTEND | |
138 | if (argE->eflags & REG_STARTEND) | |
139 | lua_pushlstring (L, argE->text, argE->textlen); | |
140 | else | |
141 | #endif | |
142 | lua_pushstring (L, argE->text); | |
143 | } | |
144 | ||
145 | static int findmatch_exec (TPosix *ud, TArgExec *argE) { | |
146 | #ifdef REG_STARTEND | |
147 | if (argE->eflags & REG_STARTEND) { | |
148 | ud->match[0].rm_so = argE->startoffset; | |
149 | ud->match[0].rm_eo = argE->textlen; | |
150 | argE->startoffset = 0; | |
151 | } | |
152 | else | |
153 | #endif | |
154 | argE->text += argE->startoffset; | |
155 | return regexec (&ud->r, argE->text, ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
156 | } | |
157 | ||
158 | static int gsub_exec (TPosix *ud, TArgExec *argE, int st) { | |
159 | #ifdef REG_STARTEND | |
160 | if(argE->eflags & REG_STARTEND) { | |
161 | ALG_SUBBEG(ud,0) = 0; | |
162 | ALG_SUBEND(ud,0) = argE->textlen - st; | |
163 | } | |
164 | #endif | |
165 | if (st > 0) | |
166 | argE->eflags |= REG_NOTBOL; | |
167 | return regexec (&ud->r, argE->text+st, ALG_NSUB(ud)+1, ud->match, argE->eflags); | |
168 | } | |
169 | ||
170 | static int split_exec (TPosix *ud, TArgExec *argE, int offset) { | |
171 | #ifdef REG_STARTEND | |
172 | if (argE->eflags & REG_STARTEND) { | |
173 | ALG_SUBBEG(ud,0) = 0; | |
174 | ALG_SUBEND(ud,0) = argE->textlen - offset; | |
175 | } | |
176 | #endif | |
177 | if (offset > 0) | |
178 | argE->eflags |= REG_NOTBOL; | |
179 | ||
180 | return regexec (&ud->r, argE->text + offset, ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
181 | } | |
182 | ||
183 | static int Posix_gc (lua_State *L) { | |
184 | TPosix *ud = check_ud (L); | |
185 | if (ud->freed == 0) { /* precaution against "manual" __gc calling */ | |
186 | ud->freed = 1; | |
187 | regfree (&ud->r); | |
188 | Lfree (L, ud->match, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); | |
189 | } | |
190 | return 0; | |
191 | } | |
192 | ||
193 | static int Posix_tostring (lua_State *L) { | |
194 | TPosix *ud = check_ud (L); | |
195 | if (ud->freed == 0) | |
196 | lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); | |
197 | else | |
198 | lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); | |
199 | return 1; | |
200 | } | |
201 | ||
202 | static flag_pair posix_flags[] = | |
203 | { | |
204 | #ifdef REX_POSIX_EXT | |
205 | { "BASIC", REG_BASIC }, | |
206 | { "NOSPEC", REG_NOSPEC }, | |
207 | { "PEND", REG_PEND }, | |
208 | #endif | |
209 | #ifdef REG_STARTEND | |
210 | { "STARTEND", REG_STARTEND }, | |
211 | #endif | |
212 | { "EXTENDED", REG_EXTENDED }, | |
213 | { "ICASE", REG_ICASE }, | |
214 | { "NOSUB", REG_NOSUB }, | |
215 | { "NEWLINE", REG_NEWLINE }, | |
216 | { "NOTBOL", REG_NOTBOL }, | |
217 | { "NOTEOL", REG_NOTEOL }, | |
218 | /*---------------------------------------------------------------------------*/ | |
219 | { NULL, 0 } | |
220 | }; | |
221 | ||
222 | static flag_pair posix_error_flags[] = { | |
223 | { "NOMATCH", REG_NOMATCH }, | |
224 | { "BADPAT", REG_BADPAT }, | |
225 | { "ECOLLATE", REG_ECOLLATE }, | |
226 | { "ECTYPE", REG_ECTYPE }, | |
227 | { "EESCAPE", REG_EESCAPE }, | |
228 | { "ESUBREG", REG_ESUBREG }, | |
229 | { "EBRACK", REG_EBRACK }, | |
230 | { "EPAREN", REG_EPAREN }, | |
231 | { "EBRACE", REG_EBRACE }, | |
232 | { "BADBR", REG_BADBR }, | |
233 | { "ERANGE", REG_ERANGE }, | |
234 | { "ESPACE", REG_ESPACE }, | |
235 | { "BADRPT", REG_BADRPT }, | |
236 | #ifdef REX_POSIX_EXT | |
237 | { "EMPTY", REG_EMPTY }, | |
238 | { "ASSERT", REG_ASSERT }, | |
239 | { "INVARG", REG_INVARG }, | |
240 | #endif | |
241 | /*---------------------------------------------------------------------------*/ | |
242 | { NULL, 0 } | |
243 | }; | |
244 | ||
245 | static int Posix_get_flags (lua_State *L) { | |
246 | const flag_pair* fps[] = { posix_flags, posix_error_flags, NULL }; | |
247 | return get_flags (L, fps); | |
248 | } | |
249 | ||
250 | static const luaL_Reg r_methods[] = { | |
251 | { "exec", algm_exec }, | |
252 | { "tfind", algm_tfind }, /* old match */ | |
253 | { "find", algm_find }, | |
254 | { "match", algm_match }, | |
255 | { "__gc", Posix_gc }, | |
256 | { "__tostring", Posix_tostring }, | |
257 | { NULL, NULL} | |
258 | }; | |
259 | ||
260 | static const luaL_Reg r_functions[] = { | |
261 | { "match", algf_match }, | |
262 | { "find", algf_find }, | |
263 | { "gmatch", algf_gmatch }, | |
264 | { "gsub", algf_gsub }, | |
265 | { "count", algf_count }, | |
266 | { "split", algf_split }, | |
267 | { "new", algf_new }, | |
268 | { "flags", Posix_get_flags }, | |
269 | { NULL, NULL } | |
270 | }; | |
271 | ||
272 | /* Open the library */ | |
273 | REX_API int REX_OPENLIB (lua_State *L) | |
274 | { | |
275 | alg_register(L, r_methods, r_functions, "POSIX regexes"); | |
276 | return 1; | |
277 | } |
0 | /* ltre.c - Lua binding of TRE regular expressions library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <ctype.h> | |
6 | #include "lua.h" | |
7 | #include "lauxlib.h" | |
8 | #include "../common.h" | |
9 | extern void add_wide_lib (lua_State *L); | |
10 | ||
11 | #include <tre/tre.h> | |
12 | ||
13 | /* These 2 settings may be redefined from the command-line or the makefile. | |
14 | * They should be kept in sync between themselves and with the target name. | |
15 | */ | |
16 | #ifndef REX_LIBNAME | |
17 | # define REX_LIBNAME "rex_tre" | |
18 | #endif | |
19 | #ifndef REX_OPENLIB | |
20 | # define REX_OPENLIB luaopen_rex_tre | |
21 | #endif | |
22 | ||
23 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
24 | ||
25 | #define ALG_CFLAGS_DFLT REG_EXTENDED | |
26 | #define ALG_EFLAGS_DFLT 0 | |
27 | ||
28 | #define ALG_NOMATCH(res) ((res) == REG_NOMATCH) | |
29 | #define ALG_ISMATCH(res) ((res) == 0) | |
30 | #define ALG_SUBBEG(ud,n) ud->match[n].rm_so | |
31 | #define ALG_SUBEND(ud,n) ud->match[n].rm_eo | |
32 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) | |
33 | #define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) | |
34 | #define ALG_NSUB(ud) ((int)ud->r.re_nsub) | |
35 | ||
36 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
37 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) | |
38 | ||
39 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
40 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
41 | ||
42 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) | |
43 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) | |
44 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
45 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
46 | ||
47 | #define ALG_BASE(st) (st) | |
48 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
49 | ||
50 | typedef struct { | |
51 | regex_t r; | |
52 | regmatch_t * match; | |
53 | int freed; | |
54 | } TPosix; | |
55 | ||
56 | #define TUserdata TPosix | |
57 | ||
58 | #include "../algo.h" | |
59 | ||
60 | /* Functions | |
61 | ****************************************************************************** | |
62 | */ | |
63 | ||
64 | static void checkarg_regaparams (lua_State *L, int stackpos, regaparams_t *argP) { | |
65 | if (lua_type (L, stackpos) != LUA_TTABLE) /* allow for userdata? */ | |
66 | luaL_argerror (L, stackpos, "table expected"); | |
67 | lua_pushvalue (L, stackpos); | |
68 | argP->cost_ins = get_int_field (L, "cost_ins"); | |
69 | argP->cost_del = get_int_field (L, "cost_del"); | |
70 | argP->cost_subst = get_int_field (L, "cost_subst"); | |
71 | argP->max_cost = get_int_field (L, "max_cost"); | |
72 | argP->max_ins = get_int_field (L, "max_ins"); | |
73 | argP->max_del = get_int_field (L, "max_del"); | |
74 | argP->max_subst = get_int_field (L, "max_subst"); | |
75 | argP->max_err = get_int_field (L, "max_err"); | |
76 | lua_pop (L, 1); | |
77 | } | |
78 | ||
79 | /* method r:atfind (s, params, [st], [ef]) */ | |
80 | /* method r:aexec (s, params, [st], [ef]) */ | |
81 | static void checkarg_atfind (lua_State *L, TArgExec *argE, TPosix **ud, | |
82 | regaparams_t *argP) { | |
83 | *ud = check_ud (L); | |
84 | argE->text = luaL_checklstring (L, 2, &argE->textlen); | |
85 | checkarg_regaparams (L, 3, argP); | |
86 | argE->startoffset = get_startoffset (L, 4, argE->textlen); | |
87 | argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT); | |
88 | } | |
89 | ||
90 | static int generate_error (lua_State *L, const TPosix *ud, int errcode) { | |
91 | char errbuf[80]; | |
92 | tre_regerror (errcode, &ud->r, errbuf, sizeof (errbuf)); | |
93 | return luaL_error (L, "%s", errbuf); | |
94 | } | |
95 | ||
96 | static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) { | |
97 | int res; | |
98 | TPosix *ud; | |
99 | ||
100 | ud = (TPosix *)lua_newuserdata (L, sizeof (TPosix)); | |
101 | memset (ud, 0, sizeof (TPosix)); /* initialize all members to 0 */ | |
102 | ||
103 | res = tre_regncomp (&ud->r, argC->pattern, argC->patlen, argC->cflags); | |
104 | if (res != 0) | |
105 | return generate_error (L, ud, res); | |
106 | ||
107 | if (argC->cflags & REG_NOSUB) | |
108 | ud->r.re_nsub = 0; | |
109 | ud->match = (regmatch_t *) Lmalloc (L, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); | |
110 | if (!ud->match) | |
111 | luaL_error (L, "malloc failed"); | |
112 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
113 | lua_setmetatable (L, -2); | |
114 | ||
115 | if (pud) *pud = ud; | |
116 | return 1; | |
117 | } | |
118 | ||
119 | static int generic_atfind (lua_State *L, int tfind) { | |
120 | int res; | |
121 | TArgExec argE; | |
122 | TPosix *ud; | |
123 | regaparams_t argP; | |
124 | regamatch_t res_match; | |
125 | ||
126 | checkarg_atfind (L, &argE, &ud, &argP); | |
127 | if (argE.startoffset > (int)argE.textlen) | |
128 | return lua_pushnil(L), 1; | |
129 | ||
130 | argE.text += argE.startoffset; | |
131 | res_match.nmatch = ALG_NSUB(ud) + 1; | |
132 | res_match.pmatch = ud->match; | |
133 | ||
134 | /* execute the search */ | |
135 | res = tre_reganexec (&ud->r, argE.text, argE.textlen - argE.startoffset, | |
136 | &res_match, argP, argE.eflags); | |
137 | if (ALG_ISMATCH (res)) { | |
138 | ALG_PUSHOFFSETS (L, ud, argE.startoffset, 0); | |
139 | if (tfind) | |
140 | push_substring_table (L, ud, argE.text); | |
141 | else | |
142 | push_offset_table (L, ud, argE.startoffset); | |
143 | /* set values in the dictionary part of the table */ | |
144 | set_int_field (L, "cost", res_match.cost); | |
145 | set_int_field (L, "num_ins", res_match.num_ins); | |
146 | set_int_field (L, "num_del", res_match.num_del); | |
147 | set_int_field (L, "num_subst", res_match.num_subst); | |
148 | return 3; | |
149 | } | |
150 | else if (ALG_NOMATCH (res)) | |
151 | return lua_pushnil (L), 1; | |
152 | else | |
153 | return generate_error (L, ud, res); | |
154 | } | |
155 | ||
156 | static int Ltre_atfind (lua_State *L) { | |
157 | return generic_atfind (L, 1); | |
158 | } | |
159 | ||
160 | static int Ltre_aexec (lua_State *L) { | |
161 | return generic_atfind (L, 0); | |
162 | } | |
163 | ||
164 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
165 | if (argE->startoffset > 0) | |
166 | argE->eflags |= REG_NOTBOL; | |
167 | argE->text += argE->startoffset; | |
168 | return tre_regnexec (&ud->r, argE->text, argE->textlen - argE->startoffset, | |
169 | ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
170 | } | |
171 | ||
172 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
173 | lua_pushlstring (L, argE->text, argE->textlen); | |
174 | } | |
175 | ||
176 | static int findmatch_exec (TPosix *ud, TArgExec *argE) { | |
177 | argE->text += argE->startoffset; | |
178 | return tre_regnexec (&ud->r, argE->text, argE->textlen - argE->startoffset, | |
179 | ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
180 | } | |
181 | ||
182 | static int gsub_exec (TPosix *ud, TArgExec *argE, int st) { | |
183 | if (st > 0) | |
184 | argE->eflags |= REG_NOTBOL; | |
185 | return tre_regnexec (&ud->r, argE->text+st, argE->textlen-st, ALG_NSUB(ud)+1, | |
186 | ud->match, argE->eflags); | |
187 | } | |
188 | ||
189 | static int split_exec (TPosix *ud, TArgExec *argE, int offset) { | |
190 | if (offset > 0) | |
191 | argE->eflags |= REG_NOTBOL; | |
192 | return tre_regnexec (&ud->r, argE->text + offset, argE->textlen - offset, | |
193 | ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
194 | } | |
195 | ||
196 | static int Ltre_have_backrefs (lua_State *L) { | |
197 | TPosix *ud = check_ud (L); | |
198 | lua_pushboolean (L, tre_have_backrefs (&ud->r)); | |
199 | return 1; | |
200 | } | |
201 | ||
202 | static int Ltre_have_approx (lua_State *L) { | |
203 | TPosix *ud = check_ud (L); | |
204 | lua_pushboolean (L, tre_have_approx (&ud->r)); | |
205 | return 1; | |
206 | } | |
207 | ||
208 | static int Ltre_gc (lua_State *L) { | |
209 | TPosix *ud = check_ud (L); | |
210 | if (ud->freed == 0) { /* precaution against "manual" __gc calling */ | |
211 | ud->freed = 1; | |
212 | tre_regfree (&ud->r); | |
213 | Lfree (L, ud->match, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); | |
214 | } | |
215 | return 0; | |
216 | } | |
217 | ||
218 | static int Ltre_tostring (lua_State *L) { | |
219 | TPosix *ud = check_ud (L); | |
220 | if (ud->freed == 0) | |
221 | lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); | |
222 | else | |
223 | lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); | |
224 | return 1; | |
225 | } | |
226 | ||
227 | static flag_pair tre_flags[] = | |
228 | { | |
229 | { "BASIC", REG_BASIC }, | |
230 | { "NOSPEC", REG_NOSPEC }, | |
231 | { "EXTENDED", REG_EXTENDED }, | |
232 | { "ICASE", REG_ICASE }, | |
233 | { "NOSUB", REG_NOSUB }, | |
234 | { "NEWLINE", REG_NEWLINE }, | |
235 | { "NOTBOL", REG_NOTBOL }, | |
236 | { "NOTEOL", REG_NOTEOL }, | |
237 | /* TRE-specific flags */ | |
238 | { "LITERAL", REG_LITERAL }, | |
239 | { "RIGHT_ASSOC", REG_RIGHT_ASSOC }, | |
240 | { "UNGREEDY", REG_UNGREEDY }, | |
241 | { "APPROX_MATCHER", REG_APPROX_MATCHER }, | |
242 | { "BACKTRACKING_MATCHER", REG_BACKTRACKING_MATCHER }, | |
243 | /*---------------------------------------------------------------------------*/ | |
244 | { NULL, 0 } | |
245 | }; | |
246 | ||
247 | static flag_pair tre_error_flags[] = { | |
248 | { "OK", REG_OK }, /* TRE-specific */ | |
249 | { "NOMATCH", REG_NOMATCH }, | |
250 | { "BADPAT", REG_BADPAT }, | |
251 | { "ECOLLATE", REG_ECOLLATE }, | |
252 | { "ECTYPE", REG_ECTYPE }, | |
253 | { "EESCAPE", REG_EESCAPE }, | |
254 | { "ESUBREG", REG_ESUBREG }, | |
255 | { "EBRACK", REG_EBRACK }, | |
256 | { "EPAREN", REG_EPAREN }, | |
257 | { "EBRACE", REG_EBRACE }, | |
258 | { "BADBR", REG_BADBR }, | |
259 | { "ERANGE", REG_ERANGE }, | |
260 | { "ESPACE", REG_ESPACE }, | |
261 | { "BADRPT", REG_BADRPT }, | |
262 | /*---------------------------------------------------------------------------*/ | |
263 | { NULL, 0 } | |
264 | }; | |
265 | ||
266 | /* config. flags with integer value */ | |
267 | static flag_pair tre_config_flags_int[] = { | |
268 | { "CONFIG_APPROX", TRE_CONFIG_APPROX }, | |
269 | { "CONFIG_WCHAR", TRE_CONFIG_WCHAR }, | |
270 | { "CONFIG_MULTIBYTE", TRE_CONFIG_MULTIBYTE }, | |
271 | { "CONFIG_SYSTEM_ABI", TRE_CONFIG_SYSTEM_ABI }, | |
272 | { NULL, 0 } | |
273 | }; | |
274 | ||
275 | /* config. flags with string value */ | |
276 | static flag_pair tre_config_flags_str[] = { | |
277 | { "CONFIG_VERSION", TRE_CONFIG_VERSION }, | |
278 | { NULL, 0 } | |
279 | }; | |
280 | ||
281 | static int Ltre_get_flags (lua_State *L) { | |
282 | const flag_pair* fps[] = { tre_flags, tre_error_flags, NULL }; | |
283 | return get_flags (L, fps); | |
284 | } | |
285 | ||
286 | static int Ltre_config (lua_State *L) { | |
287 | int intval; | |
288 | const char *strval; | |
289 | flag_pair *fp; | |
290 | if (lua_istable (L, 1)) | |
291 | lua_settop (L, 1); | |
292 | else | |
293 | lua_newtable (L); | |
294 | for (fp = tre_config_flags_int; fp->key; ++fp) { | |
295 | if (0 == tre_config (fp->val, &intval)) { | |
296 | lua_pushinteger (L, intval); | |
297 | lua_setfield (L, -2, fp->key); | |
298 | } | |
299 | } | |
300 | for (fp = tre_config_flags_str; fp->key; ++fp) { | |
301 | if (0 == tre_config (fp->val, &strval)) { | |
302 | lua_pushstring (L, strval); | |
303 | lua_setfield (L, -2, fp->key); | |
304 | } | |
305 | } | |
306 | return 1; | |
307 | } | |
308 | ||
309 | static int Ltre_version (lua_State *L) { | |
310 | lua_pushstring (L, tre_version ()); | |
311 | return 1; | |
312 | } | |
313 | ||
314 | static const luaL_Reg r_methods[] = { | |
315 | { "exec", algm_exec }, | |
316 | { "find", algm_find }, | |
317 | { "match", algm_match }, | |
318 | { "tfind", algm_tfind }, | |
319 | { "aexec", Ltre_aexec }, | |
320 | { "atfind", Ltre_atfind }, | |
321 | { "have_approx", Ltre_have_approx }, | |
322 | { "have_backrefs", Ltre_have_backrefs }, | |
323 | { "__gc", Ltre_gc }, | |
324 | { "__tostring", Ltre_tostring }, | |
325 | { NULL, NULL} | |
326 | }; | |
327 | ||
328 | static const luaL_Reg r_functions[] = { | |
329 | { "new", algf_new }, | |
330 | { "find", algf_find }, | |
331 | { "gmatch", algf_gmatch }, | |
332 | { "gsub", algf_gsub }, | |
333 | { "count", algf_count }, | |
334 | { "match", algf_match }, | |
335 | { "split", algf_split }, | |
336 | { "config", Ltre_config }, | |
337 | { "flags", Ltre_get_flags }, | |
338 | { "version", Ltre_version }, | |
339 | { NULL, NULL } | |
340 | }; | |
341 | ||
342 | /* Open the library */ | |
343 | REX_API int REX_OPENLIB (lua_State *L) | |
344 | { | |
345 | alg_register(L, r_methods, r_functions, "TRE regexes"); | |
346 | #ifdef REX_ADDWIDECHARFUNCS | |
347 | add_wide_lib (L); | |
348 | #endif | |
349 | return 1; | |
350 | } |
0 | /* ltre.c - Lua binding of TRE regular expressions library */ | |
1 | /* See Copyright Notice in the file LICENSE */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | #include <ctype.h> | |
6 | #include "lua.h" | |
7 | #include "lauxlib.h" | |
8 | #include "../common.h" | |
9 | ||
10 | #include <tre/tre.h> | |
11 | ||
12 | void bufferZ_putrepstringW (TBuffer *BufRep, int reppos, int nsub); | |
13 | ||
14 | /* These 2 settings may be redefined from the command-line or the makefile. | |
15 | * They should be kept in sync between themselves and with the target name. | |
16 | */ | |
17 | #ifndef REX_LIBNAME | |
18 | # define REX_LIBNAME "rex_tre" | |
19 | #endif | |
20 | #ifndef REX_OPENLIB | |
21 | # define REX_OPENLIB luaopen_rex_tre | |
22 | #endif | |
23 | ||
24 | #define REX_TYPENAME REX_LIBNAME"_regex" | |
25 | ||
26 | #define ALG_CFLAGS_DFLT REG_EXTENDED | |
27 | #define ALG_EFLAGS_DFLT 0 | |
28 | #define ALG_CHARSIZE 2 | |
29 | #define BUFFERZ_PUTREPSTRING bufferZ_putrepstringW | |
30 | ||
31 | #define ALG_NOMATCH(res) ((res) == REG_NOMATCH) | |
32 | #define ALG_ISMATCH(res) ((res) == 0) | |
33 | #define ALG_SUBBEG(ud,n) (ALG_CHARSIZE * ud->match[n].rm_so) | |
34 | #define ALG_SUBEND(ud,n) (ALG_CHARSIZE * ud->match[n].rm_eo) | |
35 | #define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) | |
36 | #define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) | |
37 | #define ALG_NSUB(ud) ((int)ud->r.re_nsub) | |
38 | ||
39 | #define ALG_PUSHSUB(L,ud,text,n) \ | |
40 | lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) | |
41 | ||
42 | #define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ | |
43 | (ALG_SUBVALID(ud,n) ? (void) ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) | |
44 | ||
45 | #define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBBEG(ud,n))/ALG_CHARSIZE + 1) | |
46 | #define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, ((offs) + ALG_SUBEND(ud,n))/ALG_CHARSIZE) | |
47 | #define ALG_PUSHOFFSETS(L,ud,offs,n) \ | |
48 | (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) | |
49 | ||
50 | #define ALG_BASE(st) (st) | |
51 | #define ALG_GETCFLAGS(L,pos) (int)luaL_optinteger(L, pos, ALG_CFLAGS_DFLT) | |
52 | ||
53 | typedef struct { | |
54 | regex_t r; | |
55 | regmatch_t * match; | |
56 | int freed; | |
57 | } TPosix; | |
58 | ||
59 | #define TUserdata TPosix | |
60 | ||
61 | #include "../algo.h" | |
62 | ||
63 | /* Functions | |
64 | ****************************************************************************** | |
65 | */ | |
66 | ||
67 | static void checkarg_regaparams (lua_State *L, int stackpos, regaparams_t *argP) { | |
68 | if (lua_type (L, stackpos) != LUA_TTABLE) /* allow for userdata? */ | |
69 | luaL_argerror (L, stackpos, "table expected"); | |
70 | lua_pushvalue (L, stackpos); | |
71 | argP->cost_ins = get_int_field (L, "cost_ins"); | |
72 | argP->cost_del = get_int_field (L, "cost_del"); | |
73 | argP->cost_subst = get_int_field (L, "cost_subst"); | |
74 | argP->max_cost = get_int_field (L, "max_cost"); | |
75 | argP->max_ins = get_int_field (L, "max_ins"); | |
76 | argP->max_del = get_int_field (L, "max_del"); | |
77 | argP->max_subst = get_int_field (L, "max_subst"); | |
78 | argP->max_err = get_int_field (L, "max_err"); | |
79 | lua_pop (L, 1); | |
80 | } | |
81 | ||
82 | /* method r:atfind (s, params, [st], [ef]) */ | |
83 | /* method r:aexec (s, params, [st], [ef]) */ | |
84 | static void checkarg_atfind (lua_State *L, TArgExec *argE, TPosix **ud, | |
85 | regaparams_t *argP) { | |
86 | *ud = check_ud (L); | |
87 | argE->text = luaL_checklstring (L, 2, &argE->textlen); | |
88 | checkarg_regaparams (L, 3, argP); | |
89 | argE->startoffset = get_startoffset (L, 4, argE->textlen); | |
90 | argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT); | |
91 | } | |
92 | ||
93 | static int generate_error (lua_State *L, const TPosix *ud, int errcode) { | |
94 | char errbuf[80]; | |
95 | tre_regerror (errcode, &ud->r, errbuf, sizeof (errbuf)); | |
96 | return luaL_error (L, "%s", errbuf); | |
97 | } | |
98 | ||
99 | static int compile_regex (lua_State *L, const TArgComp *argC, TPosix **pud) { | |
100 | int res; | |
101 | TPosix *ud; | |
102 | ||
103 | ud = (TPosix *)lua_newuserdata (L, sizeof (TPosix)); | |
104 | memset (ud, 0, sizeof (TPosix)); /* initialize all members to 0 */ | |
105 | ||
106 | res = tre_regwncomp (&ud->r, (const wchar_t*)argC->pattern, argC->patlen/ALG_CHARSIZE, argC->cflags); | |
107 | if (res != 0) | |
108 | return generate_error (L, ud, res); | |
109 | ||
110 | if (argC->cflags & REG_NOSUB) | |
111 | ud->r.re_nsub = 0; | |
112 | ud->match = (regmatch_t *) Lmalloc (L, (ALG_NSUB(ud) + 1) * sizeof (regmatch_t)); | |
113 | if (!ud->match) | |
114 | luaL_error (L, "malloc failed"); | |
115 | lua_pushvalue (L, ALG_ENVIRONINDEX); | |
116 | lua_setmetatable (L, -2); | |
117 | ||
118 | if (pud) *pud = ud; | |
119 | return 1; | |
120 | } | |
121 | ||
122 | static int generic_atfind (lua_State *L, int tfind) { | |
123 | int res; | |
124 | TArgExec argE; | |
125 | TPosix *ud; | |
126 | regaparams_t argP; | |
127 | regamatch_t res_match; | |
128 | ||
129 | checkarg_atfind (L, &argE, &ud, &argP); | |
130 | if (argE.startoffset > (int)argE.textlen) | |
131 | return lua_pushnil(L), 1; | |
132 | ||
133 | argE.text += argE.startoffset; | |
134 | res_match.nmatch = ALG_NSUB(ud) + 1; | |
135 | res_match.pmatch = ud->match; | |
136 | ||
137 | /* execute the search */ | |
138 | res = tre_regawnexec (&ud->r, (const wchar_t*)argE.text, | |
139 | (argE.textlen - argE.startoffset)/ALG_CHARSIZE, &res_match, argP, argE.eflags); | |
140 | if (ALG_ISMATCH (res)) { | |
141 | ALG_PUSHOFFSETS (L, ud, argE.startoffset, 0); | |
142 | if (tfind) | |
143 | push_substring_table (L, ud, argE.text); | |
144 | else | |
145 | push_offset_table (L, ud, argE.startoffset); | |
146 | /* set values in the dictionary part of the table */ | |
147 | set_int_field (L, "cost", res_match.cost); | |
148 | set_int_field (L, "num_ins", res_match.num_ins); | |
149 | set_int_field (L, "num_del", res_match.num_del); | |
150 | set_int_field (L, "num_subst", res_match.num_subst); | |
151 | return 3; | |
152 | } | |
153 | else if (ALG_NOMATCH (res)) | |
154 | return lua_pushnil (L), 1; | |
155 | else | |
156 | return generate_error (L, ud, res); | |
157 | } | |
158 | ||
159 | static int Ltre_atfind (lua_State *L) { | |
160 | return generic_atfind (L, 1); | |
161 | } | |
162 | ||
163 | static int Ltre_aexec (lua_State *L) { | |
164 | return generic_atfind (L, 0); | |
165 | } | |
166 | ||
167 | static int gmatch_exec (TUserdata *ud, TArgExec *argE) { | |
168 | if (argE->startoffset > 0) | |
169 | argE->eflags |= REG_NOTBOL; | |
170 | argE->text += argE->startoffset; | |
171 | return tre_regwnexec (&ud->r, (const wchar_t*)argE->text, (argE->textlen - argE->startoffset)/ALG_CHARSIZE, | |
172 | ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
173 | } | |
174 | ||
175 | static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { | |
176 | lua_pushlstring (L, argE->text, argE->textlen); | |
177 | } | |
178 | ||
179 | static int findmatch_exec (TPosix *ud, TArgExec *argE) { | |
180 | argE->text += argE->startoffset; | |
181 | return tre_regwnexec (&ud->r, (const wchar_t*)argE->text, (argE->textlen - argE->startoffset)/ALG_CHARSIZE, | |
182 | ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
183 | } | |
184 | ||
185 | static int gsub_exec (TPosix *ud, TArgExec *argE, int st) { | |
186 | if (st > 0) | |
187 | argE->eflags |= REG_NOTBOL; | |
188 | return tre_regwnexec (&ud->r, (const wchar_t*)(argE->text+st), (argE->textlen-st)/ALG_CHARSIZE, ALG_NSUB(ud)+1, | |
189 | ud->match, argE->eflags); | |
190 | } | |
191 | ||
192 | static int split_exec (TPosix *ud, TArgExec *argE, int offset) { | |
193 | if (offset > 0) | |
194 | argE->eflags |= REG_NOTBOL; | |
195 | return tre_regwnexec (&ud->r, (const wchar_t*)(argE->text + offset), (argE->textlen - offset)/ALG_CHARSIZE, | |
196 | ALG_NSUB(ud) + 1, ud->match, argE->eflags); | |
197 | } | |
198 | ||
199 | static const luaL_Reg r_methods[] = { | |
200 | { "wexec", algm_exec }, | |
201 | { "wfind", algm_find }, | |
202 | { "wmatch", algm_match }, | |
203 | { "wtfind", algm_tfind }, | |
204 | { "waexec", Ltre_aexec }, | |
205 | { "watfind", Ltre_atfind }, | |
206 | { NULL, NULL} | |
207 | }; | |
208 | ||
209 | static const luaL_Reg r_functions[] = { | |
210 | { "wnew", algf_new }, | |
211 | { "wfind", algf_find }, | |
212 | { "wgmatch", algf_gmatch }, | |
213 | { "wgsub", algf_gsub }, | |
214 | { "wcount", algf_count }, | |
215 | { "wmatch", algf_match }, | |
216 | { "wsplit", algf_split }, | |
217 | { NULL, NULL } | |
218 | }; | |
219 | ||
220 | /* Add the library */ | |
221 | void add_wide_lib (lua_State *L) | |
222 | { | |
223 | (void)alg_register; | |
224 | lua_pushvalue(L, -2); | |
225 | #if LUA_VERSION_NUM == 501 | |
226 | luaL_register(L, NULL, r_methods); | |
227 | lua_pop(L, 1); | |
228 | luaL_register(L, NULL, r_functions); | |
229 | #else | |
230 | lua_pushvalue(L, -1); | |
231 | luaL_setfuncs(L, r_methods, 1); | |
232 | luaL_setfuncs(L, r_functions, 1); | |
233 | #endif | |
234 | } | |
235 | ||
236 | /* 1. When called repeatedly on the same TBuffer, its existing data | |
237 | is discarded and overwritten by the new data. | |
238 | 2. The TBuffer's array is never shrunk by this function. | |
239 | */ | |
240 | void bufferZ_putrepstringW (TBuffer *BufRep, int reppos, int nsub) { | |
241 | wchar_t dbuf[] = { 0, 0 }; | |
242 | size_t replen; | |
243 | const wchar_t *p = (const wchar_t*) lua_tolstring (BufRep->L, reppos, &replen); | |
244 | replen /= sizeof(wchar_t); | |
245 | const wchar_t *end = p + replen; | |
246 | BufRep->top = 0; | |
247 | while (p < end) { | |
248 | const wchar_t *q; | |
249 | for (q = p; q < end && *q != L'%'; ++q) | |
250 | {} | |
251 | if (q != p) | |
252 | bufferZ_addlstring (BufRep, p, (q - p) * sizeof(wchar_t)); | |
253 | if (q < end) { | |
254 | if (++q < end) { /* skip % */ | |
255 | if (iswdigit (*q)) { | |
256 | int num; | |
257 | *dbuf = *q; | |
258 | num = wcstol (dbuf, NULL, 10); | |
259 | if (num == 1 && nsub == 0) | |
260 | num = 0; | |
261 | else if (num > nsub) { | |
262 | freelist_free (BufRep->freelist); | |
263 | luaL_error (BufRep->L, "invalid capture index"); | |
264 | } | |
265 | bufferZ_addnum (BufRep, num); | |
266 | } | |
267 | else bufferZ_addlstring (BufRep, q, 1 * sizeof(wchar_t)); | |
268 | } | |
269 | p = q + 1; | |
270 | } | |
271 | else break; | |
272 | } | |
273 | } |
0 | To test Lrexlib, execute the following command line: | |
1 | ||
2 | lua ./runtest.lua [-a] [-v] LIBRARY... | |
3 | ||
4 | -a use the external "Alien" library for "buffer subject" tests, | |
5 | rather than the internal function | |
6 | -v gives verbose output |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | -- This file should contain only test sets that behave identically | |
3 | -- when being run with pcre or posix regex libraries. | |
4 | ||
5 | local luatest = require "luatest" | |
6 | local N = luatest.NT | |
7 | local unpack = unpack or table.unpack | |
8 | ||
9 | local function norm(a) return a==nil and N or a end | |
10 | ||
11 | local function get_gsub (lib) | |
12 | return lib.gsub or | |
13 | function (subj, pattern, repl, n) | |
14 | return lib.new (pattern) : gsub (subj, repl, n) | |
15 | end | |
16 | end | |
17 | ||
18 | local function set_f_gmatch (lib, flg) | |
19 | -- gmatch (s, p, [cf], [ef]) | |
20 | local function test_gmatch (subj, patt) | |
21 | local out, guard = {}, 10 | |
22 | for a, b in lib.gmatch (subj, patt) do | |
23 | table.insert (out, { norm(a), norm(b) }) | |
24 | guard = guard - 1 | |
25 | if guard == 0 then break end | |
26 | end | |
27 | return unpack (out) | |
28 | end | |
29 | return { | |
30 | Name = "Function gmatch", | |
31 | Func = test_gmatch, | |
32 | --{ subj patt results } | |
33 | { {"ab", lib.new"."}, {{"a",N}, {"b",N} } }, | |
34 | { {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} }, | |
35 | { {"abcd", ".*" }, {{"abcd",N} } },--zero-length match | |
36 | { {"abc", "^." }, {{"a",N}} },--anchored pattern | |
37 | } | |
38 | end | |
39 | ||
40 | local function set_f_count (lib, flg) | |
41 | return { | |
42 | Name = "Function count", | |
43 | Func = lib.count, | |
44 | --{ subj patt results } | |
45 | { {"ab", lib.new"."}, { 2 } }, | |
46 | { {("abcd"):rep(3), "(.)b.(d)"}, { 3 } }, | |
47 | { {"abcd", ".*" }, { 1 } }, | |
48 | { {"abc", "^." }, { 1 } }, | |
49 | } | |
50 | end | |
51 | ||
52 | local function set_f_split (lib, flg) | |
53 | -- split (s, p, [cf], [ef]) | |
54 | local function test_split (subj, patt) | |
55 | local out, guard = {}, 10 | |
56 | for a, b, c in lib.split (subj, patt) do | |
57 | table.insert (out, { norm(a), norm(b), norm(c) }) | |
58 | guard = guard - 1 | |
59 | if guard == 0 then break end | |
60 | end | |
61 | return unpack (out) | |
62 | end | |
63 | return { | |
64 | Name = "Function split", | |
65 | Func = test_split, | |
66 | --{ subj patt results } | |
67 | { {"ab", lib.new","}, {{"ab",N,N}, } }, | |
68 | { {"ab", ","}, {{"ab",N,N}, } }, | |
69 | { {",", ","}, {{"",",",N}, {"", N, N}, } }, | |
70 | { {",,", ","}, {{"",",",N}, {"",",",N}, {"",N,N} } }, | |
71 | { {"a,b", ","}, {{"a",",",N}, {"b",N,N}, } }, | |
72 | { {",a,b", ","}, {{"",",",N}, {"a",",",N}, {"b",N,N}} }, | |
73 | { {"a,b,", ","}, {{"a",",",N}, {"b",",",N}, {"",N,N} } }, | |
74 | { {"a,,b", ","}, {{"a",",",N}, {"",",",N}, {"b",N,N}} }, | |
75 | { {"ab<78>c", "<(.)(.)>"}, {{"ab","7","8"}, {"c",N,N}, } }, | |
76 | { {"abc", "^."}, {{"", "a",N}, {"bc",N,N}, } },--anchored pattern | |
77 | { {"abc", "^"}, {{"", "", N}, {"abc",N,N}, } }, | |
78 | -- { {"abc", "$"}, {{"abc","",N}, {"",N,N}, } }, | |
79 | -- { {"abc", "^|$"}, {{"", "", N}, {"abc","",N},{"",N,N},} }, | |
80 | } | |
81 | end | |
82 | ||
83 | local function set_f_find (lib, flg) | |
84 | return { | |
85 | Name = "Function find", | |
86 | Func = lib.find, | |
87 | -- {subj, patt, st}, { results } | |
88 | { {"abcd", lib.new".+"}, { 1,4 } }, -- [none] | |
89 | { {"abcd", ".+"}, { 1,4 } }, -- [none] | |
90 | { {"abcd", ".+", 2}, { 2,4 } }, -- positive st | |
91 | { {"abcd", ".+", -2}, { 3,4 } }, -- negative st | |
92 | { {"abcd", ".*"}, { 1,4 } }, -- [none] | |
93 | { {"abc", "bc"}, { 2,3 } }, -- [none] | |
94 | { {"abcd", "(.)b.(d)"}, { 1,4,"a","d" }}, -- [captures] | |
95 | } | |
96 | end | |
97 | ||
98 | local function set_f_match (lib, flg) | |
99 | return { | |
100 | Name = "Function match", | |
101 | Func = lib.match, | |
102 | -- {subj, patt, st}, { results } | |
103 | { {"abcd", lib.new".+"}, {"abcd"} }, -- [none] | |
104 | { {"abcd", ".+"}, {"abcd"} }, -- [none] | |
105 | { {"abcd", ".+", 2}, {"bcd"} }, -- positive st | |
106 | { {"abcd", ".+", -2}, {"cd"} }, -- negative st | |
107 | { {"abcd", ".*"}, {"abcd"} }, -- [none] | |
108 | { {"abc", "bc"}, {"bc"} }, -- [none] | |
109 | { {"abcd", "(.)b.(d)"}, {"a","d"} }, -- [captures] | |
110 | } | |
111 | end | |
112 | ||
113 | local function set_m_exec (lib, flg) | |
114 | return { | |
115 | Name = "Method exec", | |
116 | Method = "exec", | |
117 | --{patt}, {subj, st} { results } | |
118 | { {".+"}, {"abcd"}, {1,4,{}} }, -- [none] | |
119 | { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st | |
120 | { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st | |
121 | { {".*"}, {"abcd"}, {1,4,{}} }, -- [none] | |
122 | { {"bc"}, {"abc"}, {2,3,{}} }, -- [none] | |
123 | { { "(.)b.(d)"}, {"abcd"}, {1,4,{1,1,4,4}}},--[captures] | |
124 | { {"(a+)6+(b+)"}, {"Taa66bbT",2}, {2,7,{2,3,6,7}}},--[st+captures] | |
125 | } | |
126 | end | |
127 | ||
128 | local function set_m_tfind (lib, flg) | |
129 | return { | |
130 | Name = "Method tfind", | |
131 | Method = "tfind", | |
132 | --{patt}, {subj, st} { results } | |
133 | { {".+"}, {"abcd"}, {1,4,{}} }, -- [none] | |
134 | { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st | |
135 | { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st | |
136 | { {".*"}, {"abcd"}, {1,4,{}} }, -- [none] | |
137 | { {"bc"}, {"abc"}, {2,3,{}} }, -- [none] | |
138 | { {"(.)b.(d)"}, {"abcd"}, {1,4,{"a","d"}}},--[captures] | |
139 | } | |
140 | end | |
141 | ||
142 | local function set_m_find (lib, flg) | |
143 | return { | |
144 | Name = "Method find", | |
145 | Method = "find", | |
146 | --{patt}, {subj, st} { results } | |
147 | { {".+"}, {"abcd"}, {1,4} }, -- [none] | |
148 | { {".+"}, {"abcd",2}, {2,4} }, -- positive st | |
149 | { {".+"}, {"abcd",-2}, {3,4} }, -- negative st | |
150 | { {".*"}, {"abcd"}, {1,4} }, -- [none] | |
151 | { {"bc"}, {"abc"}, {2,3} }, -- [none] | |
152 | { {"(.)b.(d)"}, {"abcd"}, {1,4,"a","d"}},--[captures] | |
153 | } | |
154 | end | |
155 | ||
156 | local function set_m_match (lib, flg) | |
157 | return { | |
158 | Name = "Method match", | |
159 | Method = "match", | |
160 | --{patt}, {subj, st} { results } | |
161 | { {".+"}, {"abcd"}, {"abcd"} }, -- [none] | |
162 | { {".+"}, {"abcd",2}, {"bcd" } }, -- positive st | |
163 | { {".+"}, {"abcd",-2}, {"cd" } }, -- negative st | |
164 | { {".*"}, {"abcd"}, {"abcd"} }, -- [none] | |
165 | { {"bc"}, {"abc"}, {"bc" } }, -- [none] | |
166 | {{ "(.)b.(d)"}, {"abcd"}, {"a","d"} }, --[captures] | |
167 | } | |
168 | end | |
169 | ||
170 | local function set_f_gsub1 (lib, flg) | |
171 | local subj, pat = "abcdef", "[abef]+" | |
172 | local cpat = lib.new(pat) | |
173 | return { | |
174 | Name = "Function gsub, set1", | |
175 | Func = get_gsub (lib), | |
176 | --{ s, p, f, n, res1, res2, res3 }, | |
177 | { {subj, cpat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace | |
178 | { {subj, pat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace | |
179 | { {subj, pat, "", -1}, {subj, 0, 0} }, -- test "n" + empty_replace | |
180 | { {subj, pat, "", 1}, {"cdef", 1, 1} }, | |
181 | { {subj, pat, "", 2}, {"cd", 2, 2} }, | |
182 | { {subj, pat, "", 3}, {"cd", 2, 2} }, | |
183 | { {subj, pat, "" }, {"cd", 2, 2} }, | |
184 | { {subj, pat, "#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace | |
185 | { {subj, pat, "#", 1}, {"#cdef", 1, 1} }, | |
186 | { {subj, pat, "#", 2}, {"#cd#", 2, 2} }, | |
187 | { {subj, pat, "#", 3}, {"#cd#", 2, 2} }, | |
188 | { {subj, pat, "#" }, {"#cd#", 2, 2} }, | |
189 | { {"abc", "^.", "#" }, {"#bc", 1, 1} }, -- anchored pattern | |
190 | } | |
191 | end | |
192 | ||
193 | local function set_f_gsub2 (lib, flg) | |
194 | local subj, pat = "abc", "([ac])" | |
195 | return { | |
196 | Name = "Function gsub, set2", | |
197 | Func = get_gsub (lib), | |
198 | --{ s, p, f, n, res1, res2, res3 }, | |
199 | { {subj, pat, "<%1>" }, {"<a>b<c>", 2, 2} }, -- test non-escaped chars in f | |
200 | { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2, 2} }, -- test escaped chars in f | |
201 | { {subj, pat, "" }, {"b", 2, 2} }, -- test empty replace | |
202 | { {subj, pat, "1" }, {"1b1", 2, 2} }, -- test odd and even %'s in f | |
203 | { {subj, pat, "%1" }, {"abc", 2, 2} }, | |
204 | { {subj, pat, "%%1" }, {"%1b%1", 2, 2} }, | |
205 | { {subj, pat, "%%%1" }, {"%ab%c", 2, 2} }, | |
206 | { {subj, pat, "%%%%1" }, {"%%1b%%1", 2, 2} }, | |
207 | { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2, 2} }, | |
208 | } | |
209 | end | |
210 | ||
211 | local function set_f_gsub3 (lib, flg) | |
212 | return { | |
213 | Name = "Function gsub, set3", | |
214 | Func = get_gsub (lib), | |
215 | --{ s, p, f, n, res1,res2,res3 }, | |
216 | { {"abc", "a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index | |
217 | { {"abc", "a", "%1" }, {"abc", 1, 1} }, | |
218 | { {"abc", "[ac]", "%1" }, {"abc", 2, 2} }, | |
219 | { {"abc", "(a)", "%1" }, {"abc", 1, 1} }, | |
220 | { {"abc", "(a)", "%2" }, "invalid capture index" }, | |
221 | } | |
222 | end | |
223 | ||
224 | local function set_f_gsub4 (lib, flg) | |
225 | return { | |
226 | Name = "Function gsub, set4", | |
227 | Func = get_gsub (lib), | |
228 | --{ s, p, f, n, res1, res2, res3 }, | |
229 | { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test . | |
230 | { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+ | |
231 | { {"a2c3", ".*", "#" }, {"#", 1, 1} }, -- test .* | |
232 | { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} }, | |
233 | { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d | |
234 | { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D | |
235 | { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s | |
236 | { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S | |
237 | } | |
238 | end | |
239 | ||
240 | local function set_f_gsub5 (lib, flg) | |
241 | local function frep1 () end -- returns nothing | |
242 | local function frep2 () return "#" end -- ignores arguments | |
243 | local function frep3 (...) return table.concat({...}, ",") end -- "normal" | |
244 | local function frep4 () return {} end -- invalid return type | |
245 | local function frep5 () return "7", "a" end -- 2-nd return is "a" | |
246 | local function frep6 () return "7", "break" end -- 2-nd return is "break" | |
247 | local subj = "a2c3" | |
248 | return { | |
249 | Name = "Function gsub, set5", | |
250 | Func = get_gsub (lib), | |
251 | --{ s, p, f, n, res1, res2, res3 }, | |
252 | { {subj, "a(.)c(.)", frep1 }, {subj, 1, 0} }, | |
253 | { {subj, "a(.)c(.)", frep2 }, {"#", 1, 1} }, | |
254 | { {subj, "a(.)c(.)", frep3 }, {"2,3", 1, 1} }, | |
255 | { {subj, "a.c.", frep3 }, {subj, 1, 1} }, | |
256 | { {subj, "z*", frep1 }, {subj, 5, 0} }, | |
257 | { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} }, | |
258 | { {subj, "z*", frep3 }, {subj, 5, 5} }, | |
259 | { {subj, subj, frep4 }, "invalid return type" }, | |
260 | { {"abc",".", frep5 }, {"777", 3, 3} }, | |
261 | { {"abc",".", frep6 }, {"777", 3, 3} }, | |
262 | } | |
263 | end | |
264 | ||
265 | local function set_f_gsub6 (lib, flg) | |
266 | local tab1, tab2, tab3 = {}, { ["2"] = 56 }, { ["2"] = {} } | |
267 | local subj = "a2c3" | |
268 | return { | |
269 | Name = "Function gsub, set6", | |
270 | Func = get_gsub (lib), | |
271 | --{ s, p, f, n, res1,res2,res3 }, | |
272 | { {subj, "a(.)c(.)", tab1 }, {subj, 1, 0} }, | |
273 | { {subj, "a(.)c(.)", tab2 }, {"56", 1, 1} }, | |
274 | { {subj, "a(.)c(.)", tab3 }, "invalid replacement type" }, | |
275 | { {subj, "a.c.", tab1 }, {subj, 1, 0} }, | |
276 | { {subj, "a.c.", tab2 }, {subj, 1, 0} }, | |
277 | { {subj, "a.c.", tab3 }, {subj, 1, 0} }, | |
278 | } | |
279 | end | |
280 | ||
281 | local function set_f_gsub8 (lib, flg) | |
282 | local subj, patt, repl = "abcdef", "..", "*" | |
283 | return { | |
284 | Name = "Function gsub, set8", | |
285 | Func = get_gsub (lib), | |
286 | --{ s, p, f, n, res1, res2, res3 }, | |
287 | { {subj, patt, repl, function() end }, {"abcdef", 3, 0} }, | |
288 | { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, | |
289 | { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} }, | |
290 | { {subj, patt, repl, function() return true end }, {"***", 3, 3} }, | |
291 | { {subj, patt, repl, function() return {} end }, {"***", 3, 3} }, | |
292 | { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} }, | |
293 | { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} }, | |
294 | { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} }, | |
295 | { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} }, | |
296 | { {subj, patt, repl, function (from,to,rep) return rep end }, | |
297 | {"***", 3, 3} }, | |
298 | { {subj, patt, repl, function (from, to, rep) return rep..to..from end }, | |
299 | {"*21*43*65", 3, 3} }, | |
300 | { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, | |
301 | { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} }, | |
302 | { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} }, | |
303 | { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} }, | |
304 | { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} }, | |
305 | { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} }, | |
306 | { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} }, | |
307 | { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} }, | |
308 | { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} }, | |
309 | } | |
310 | end | |
311 | ||
312 | return function (libname) | |
313 | local lib = require (libname) | |
314 | return { | |
315 | set_f_gmatch (lib), | |
316 | set_f_split (lib), | |
317 | set_f_find (lib), | |
318 | set_f_match (lib), | |
319 | set_m_exec (lib), | |
320 | set_m_tfind (lib), | |
321 | set_m_find (lib), | |
322 | set_m_match (lib), | |
323 | set_f_count (lib), | |
324 | set_f_gsub1 (lib), | |
325 | set_f_gsub2 (lib), | |
326 | set_f_gsub3 (lib), | |
327 | set_f_gsub4 (lib), | |
328 | set_f_gsub5 (lib), | |
329 | set_f_gsub6 (lib), | |
330 | set_f_gsub8 (lib), | |
331 | } | |
332 | end |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | local luatest = require "luatest" | |
3 | local N = luatest.NT | |
4 | local unpack = unpack or table.unpack | |
5 | ||
6 | local function norm(a) return a==nil and N or a end | |
7 | ||
8 | local function set_f_gmatch (lib, flg) | |
9 | -- gmatch (s, p, [cf], [ef]) | |
10 | local function test_gmatch (subj, patt) | |
11 | local out, guard = {}, 10 | |
12 | for a, b in lib.gmatch (subj, patt, flg.SYNTAX_EMACS, nil) do | |
13 | table.insert (out, { norm(a), norm(b) }) | |
14 | guard = guard - 1 | |
15 | if guard == 0 then break end | |
16 | end | |
17 | return unpack (out) | |
18 | end | |
19 | return { | |
20 | Name = "Function gmatch", | |
21 | Func = test_gmatch, | |
22 | --{ subj patt results } | |
23 | { {("abcd"):rep(3), "\\(.\\)b.\\(d\\)"}, {{"a","d"},{"a","d"},{"a","d"}} }, | |
24 | } | |
25 | end | |
26 | ||
27 | local function set_f_split (lib, flg) | |
28 | -- split (s, p, [cf], [ef]) | |
29 | local function test_split (subj, patt) | |
30 | local out, guard = {}, 10 | |
31 | for a, b, c in lib.split (subj, patt, flg.SYNTAX_EMACS, nil) do | |
32 | table.insert (out, { norm(a), norm(b), norm(c) }) | |
33 | guard = guard - 1 | |
34 | if guard == 0 then break end | |
35 | end | |
36 | return unpack (out) | |
37 | end | |
38 | return { | |
39 | Name = "Function split", | |
40 | Func = test_split, | |
41 | --{ subj patt results } | |
42 | { {"ab<78>c", "<\\(.\\)\\(.\\)>"}, {{"ab","7","8"}, {"c",N,N}, } }, | |
43 | } | |
44 | end | |
45 | ||
46 | return function (libname) | |
47 | local lib = require (libname) | |
48 | local flags = lib.flags () | |
49 | return { | |
50 | set_f_gmatch (lib, flags), | |
51 | set_f_split (lib, flags), | |
52 | } | |
53 | end |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | local luatest = require "luatest" | |
3 | local N = luatest.NT | |
4 | local unpack = unpack or table.unpack | |
5 | ||
6 | local function norm(a) return a==nil and N or a end | |
7 | ||
8 | local function set_f_gmatch (lib, flg) | |
9 | local downcase = {} | |
10 | for i = 0, 255 do -- 255 == UCHAR_MAX | |
11 | downcase[i] = string.gsub(string.char (i), ".", function (s) return string.lower(s) end) | |
12 | end | |
13 | -- gmatch (s, p, [cf], [ef], [tr]) | |
14 | local function test_gmatch (subj, patt) | |
15 | local out, guard = {}, 10 | |
16 | for a, b in lib.gmatch (subj, patt, nil, nil, downcase) do | |
17 | table.insert (out, { norm(a), norm(b) }) | |
18 | guard = guard - 1 | |
19 | if guard == 0 then break end | |
20 | end | |
21 | return unpack (out) | |
22 | end | |
23 | return { | |
24 | Name = "Function gmatch", | |
25 | Func = test_gmatch, | |
26 | --{ subj patt results } | |
27 | { {"abA", "a"}, {{"a",N}, {"A",N} } }, | |
28 | } | |
29 | end | |
30 | ||
31 | local function set_f_match (lib, flg) | |
32 | return { | |
33 | Name = "Function match", | |
34 | Func = lib.match, | |
35 | --{subj, patt, st,cf,ef}, { results } | |
36 | { {"abcd", ".+", 5}, { N } }, -- failing st | |
37 | { {"abc", "^abc"}, {"abc" } }, -- anchor | |
38 | { {"abc", "^abc", N,N,flg.not_bol}, { N } }, -- anchor + ef | |
39 | { {"abc", "abc$", N,N,flg.not_eol}, { N } }, -- anchor + ef | |
40 | { {"cabcaab", "ca+b", N,N,flg.backward}, {"caab" } }, -- reverse search | |
41 | } | |
42 | end | |
43 | ||
44 | return function (libname) | |
45 | local lib = require (libname) | |
46 | local flags = lib.flags () | |
47 | return { | |
48 | set_f_match (lib, flags), | |
49 | set_f_gmatch (lib), | |
50 | } | |
51 | end |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | -- arrays: deep comparison | |
3 | local function eq (t1, t2, lut) | |
4 | if t1 == t2 then return true end | |
5 | if type(t1) ~= "table" or type(t2) ~= "table" or #t1 ~= #t2 then | |
6 | return false | |
7 | end | |
8 | ||
9 | lut = lut or {} -- look-up table: are these 2 arrays already compared? | |
10 | lut[t1] = lut[t1] or {} | |
11 | if lut[t1][t2] then return true end | |
12 | lut[t2] = lut[t2] or {} | |
13 | lut[t1][t2], lut[t2][t1] = true, true | |
14 | ||
15 | for k,v in ipairs (t1) do | |
16 | if not eq (t2[k], v, lut) then return false end -- recursion | |
17 | end | |
18 | return true | |
19 | end | |
20 | ||
21 | -- a "nil GUID", to be used instead of nils in datasets | |
22 | local NT = "b5f74fe5-46f4-483a-8321-e58ba2fa0e17" | |
23 | ||
24 | -- pack vararg in table, replacing nils with "NT" items | |
25 | local function packNT (...) | |
26 | local t = {} | |
27 | for i=1, select ("#", ...) do | |
28 | local v = select (i, ...) | |
29 | t[i] = (v == nil) and NT or v | |
30 | end | |
31 | return t | |
32 | end | |
33 | ||
34 | -- unpack table into vararg, replacing "NT" items with nils | |
35 | local function unpackNT (t) | |
36 | local len = #t | |
37 | local function unpack_from (i) | |
38 | local v = t[i] | |
39 | if v == NT then v = nil end | |
40 | if i == len then return v end | |
41 | return v, unpack_from (i+1) | |
42 | end | |
43 | if len > 0 then return unpack_from (1) end | |
44 | end | |
45 | ||
46 | -- print results (deep into arrays) | |
47 | local function print_results (val, indent, lut) | |
48 | indent = indent or "" | |
49 | lut = lut or {} -- look-up table | |
50 | local str = tostring (val) | |
51 | if type (val) == "table" then | |
52 | if lut[val] then | |
53 | io.write (indent, str, "\n") | |
54 | else | |
55 | lut[val] = true | |
56 | io.write (indent, str, "\n") | |
57 | for i,v in ipairs (val) do | |
58 | print_results (v, " " .. indent, lut) -- recursion | |
59 | end | |
60 | end | |
61 | else | |
62 | io.write (indent, val == NT and "nil" or str, "\n") | |
63 | end | |
64 | end | |
65 | ||
66 | -- returns: | |
67 | -- 1) true, if success; false, if failure | |
68 | -- 2) test results table or error_message | |
69 | local function test_function (test, func, newmembuffer) | |
70 | local res | |
71 | local t = packNT (pcall (func, unpackNT (test[1]))) | |
72 | if t[1] then | |
73 | table.remove (t, 1) | |
74 | res = t | |
75 | if newmembuffer then | |
76 | test[1][1] = newmembuffer (test[1][1]) | |
77 | local t = packNT (pcall (func, unpackNT (test[1]))) | |
78 | if t[1] then | |
79 | table.remove (t, 1) | |
80 | res = t | |
81 | else | |
82 | print "buffer subjects test failed" | |
83 | res = t[2] --> error_message | |
84 | end | |
85 | end | |
86 | else | |
87 | res = t[2] --> error_message | |
88 | end | |
89 | local how = (type (res) == type (test[2])) and | |
90 | (type (res) == "string" or eq (res, test[2])) -- allow error messages to differ | |
91 | return how, res | |
92 | end | |
93 | ||
94 | -- returns: | |
95 | -- 1) true, if success; false, if failure | |
96 | -- 2) test results table or error_message | |
97 | -- 3) test results table or error_message | |
98 | local function test_method (test, constructor, name) | |
99 | local res1, res2 | |
100 | local subject = test[2][1] | |
101 | local ok, r = pcall (constructor, unpackNT (test[1])) | |
102 | if ok then | |
103 | local t = packNT (pcall (r[name], r, unpackNT (test[2]))) | |
104 | if t[1] then | |
105 | table.remove (t, 1) | |
106 | res1, res2 = t | |
107 | else | |
108 | res1, res2 = 2, t[2] --> 2, error_message | |
109 | end | |
110 | else | |
111 | res1, res2 = 1, r --> 1, error_message | |
112 | end | |
113 | return eq (res1, test[3]), res1, res2 | |
114 | end | |
115 | ||
116 | -- returns: a list of failed tests | |
117 | local function test_set (set, lib, newmembuffer) | |
118 | local list = {} | |
119 | ||
120 | if type (set.Func) == "function" then | |
121 | local func = set.Func | |
122 | for i,test in ipairs (set) do | |
123 | local ok, res = test_function (test, func, newmembuffer) | |
124 | if not ok then | |
125 | table.insert (list, {i=i, res}) | |
126 | end | |
127 | end | |
128 | ||
129 | elseif type (set.Method) == "string" then | |
130 | for i,test in ipairs (set) do | |
131 | local ok, res1, res2 = test_method (test, lib.new, set.Method) | |
132 | if not ok then | |
133 | table.insert (list, {i=i, res1, res2}) | |
134 | end | |
135 | end | |
136 | ||
137 | else | |
138 | error ("neither set.Func nor set.Method is valid") | |
139 | end | |
140 | ||
141 | return list | |
142 | end | |
143 | ||
144 | return { | |
145 | eq = eq, | |
146 | NT = NT, | |
147 | print_results = print_results, | |
148 | test_function = test_function, | |
149 | test_method = test_method, | |
150 | test_set = test_set, | |
151 | } |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | local luatest = require "luatest" | |
3 | local N = luatest.NT | |
4 | local unpack = unpack or table.unpack | |
5 | ||
6 | local function norm(a) return a==nil and N or a end | |
7 | ||
8 | local function fill (n, m) | |
9 | local t = {} | |
10 | for i = n, m, -1 do table.insert (t, i) end | |
11 | return t | |
12 | end | |
13 | ||
14 | local function set_named_subpatterns (lib, flg) | |
15 | return { | |
16 | Name = "Named Subpatterns", | |
17 | Func = function (subj, methodname, patt, name1, name2) | |
18 | local r = lib.new (patt) | |
19 | local _,_,caps = r[methodname] (r, subj) | |
20 | return norm(caps[name1]), norm(caps[name2]) | |
21 | end, | |
22 | --{} N.B. subject is always first element | |
23 | { {"abcd", "tfind", "(?<dog>.)b.(?<cat>d)", "dog", "cat"}, {"a","d"} }, | |
24 | { {"abcd", "exec", "(?<dog>.)b.(?<cat>d)", "dog", "cat"}, {"a","d"} }, | |
25 | } | |
26 | end | |
27 | ||
28 | local function set_f_find (lib, flg) | |
29 | local cp1251 = | |
30 | "������¨�������������������������‗אבגדהו¸זחטיךכלםמןנסעףפץצקרש��ת�" | |
31 | local loc = "CP1251" | |
32 | return { | |
33 | Name = "Function find", | |
34 | Func = lib.find, | |
35 | --{subj, patt, st,cf,ef,lo}, { results } | |
36 | { {"abcd", ".+", 5}, { N } }, -- failing st | |
37 | { {"abcd", ".*?"}, { 1,0 } }, -- non-greedy | |
38 | { {"abc", "aBC", N,flg.IGNORECASE}, { 1,3 } }, -- cf | |
39 | { {"abc", "aBC", N,"i" }, { 1,3 } }, -- cf | |
40 | { {cp1251, "[[:upper:]]+", N,N,N, loc}, { 1,33} }, -- locale | |
41 | { {cp1251, "[[:lower:]]+", N,N,N, loc}, {34,66} }, -- locale | |
42 | { {cp1251, "\\w+", N,N,N, loc}, {1, 66} }, -- locale | |
43 | } | |
44 | end | |
45 | ||
46 | local function set_f_match (lib, flg) | |
47 | return { | |
48 | Name = "Function match", | |
49 | Func = lib.match, | |
50 | --{subj, patt, st,cf,ef,lo}, { results } | |
51 | { {"abcd", ".+", 5}, { N }}, -- failing st | |
52 | { {"abcd", ".*?"}, { "" }}, -- non-greedy | |
53 | { {"abc", "aBC", N,flg.IGNORECASE}, {"abc" }}, -- cf | |
54 | { {"abc", "aBC", N,"i" }, {"abc" }}, -- cf | |
55 | } | |
56 | end | |
57 | ||
58 | local function set_f_gmatch (lib, flg) | |
59 | -- gmatch (s, p, [cf], [ef]) | |
60 | local pCSV = "[^,]*" | |
61 | local F = false | |
62 | local function test_gmatch (subj, patt) | |
63 | local out, guard = {}, 10 | |
64 | for a, b in lib.gmatch (subj, patt) do | |
65 | table.insert (out, { norm(a), norm(b) }) | |
66 | guard = guard - 1 | |
67 | if guard == 0 then break end | |
68 | end | |
69 | return unpack (out) | |
70 | end | |
71 | return { | |
72 | Name = "Function gmatch", | |
73 | Func = test_gmatch, | |
74 | --{ subj patt results } | |
75 | { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj | |
76 | { {"", pCSV}, {{"",N}} }, | |
77 | { {"12", pCSV}, {{"12",N}} }, | |
78 | { {",", pCSV}, {{"", N},{"", N}} }, | |
79 | { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} }, | |
80 | { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} }, | |
81 | { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} }, | |
82 | { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} }, | |
83 | } | |
84 | end | |
85 | ||
86 | local function set_f_split (lib, flg) | |
87 | -- split (s, p, [cf], [ef]) | |
88 | local function test_split (subj, patt) | |
89 | local out, guard = {}, 10 | |
90 | for a, b, c in lib.split (subj, patt) do | |
91 | table.insert (out, { norm(a), norm(b), norm(c) }) | |
92 | guard = guard - 1 | |
93 | if guard == 0 then break end | |
94 | end | |
95 | return unpack (out) | |
96 | end | |
97 | return { | |
98 | Name = "Function split", | |
99 | Func = test_split, | |
100 | --{ subj patt results } | |
101 | { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj | |
102 | { {"ab", "$"}, {{"ab","",N}, {"",N,N} } }, | |
103 | { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
104 | { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } }, | |
105 | { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
106 | { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } }, | |
107 | { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } }, | |
108 | { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }}, | |
109 | } | |
110 | end | |
111 | ||
112 | local function set_f_internal_test (lib, flg) | |
113 | return { | |
114 | Name = "Function internal_test", | |
115 | Func = lib.internal_test, | |
116 | --{ params results } | |
117 | { {""}, {true} }, | |
118 | } | |
119 | end | |
120 | ||
121 | local function set_m_exec (lib, flg) | |
122 | return { | |
123 | Name = "Method exec", | |
124 | Method = "exec", | |
125 | --{patt,cf,lo}, {subj,st,ef} { results } | |
126 | { {".+"}, {"abcd",5}, { N } }, -- failing st | |
127 | { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy | |
128 | { {"aBC",flg.IGNORECASE}, {"abc"}, {1,3,{}} }, -- cf | |
129 | { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf | |
130 | } | |
131 | end | |
132 | ||
133 | local function set_m_tfind (lib, flg) | |
134 | return { | |
135 | Name = "Method tfind", | |
136 | Method = "tfind", | |
137 | --{patt,cf,lo}, {subj,st,ef} { results } | |
138 | { {".+"}, {"abcd",5}, { N } }, -- failing st | |
139 | { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy | |
140 | { {"aBC",flg.IGNORECASE}, {"abc"}, {1,3,{}} }, -- cf | |
141 | { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf | |
142 | } | |
143 | end | |
144 | ||
145 | local function set_m_capturecount (lib, flg) | |
146 | return { | |
147 | Name = "Method capturecount", | |
148 | Method = "capturecount", | |
149 | --{patt,cf,lo}, {subj,st,ef} { results } | |
150 | { {"a"}, {}, { 0 } }, | |
151 | { {"(a)"}, {}, { 1 } }, | |
152 | { {"(a)(a)"}, {}, { 2 } }, | |
153 | { {"((a)a)"}, {}, { 2 } }, | |
154 | { {"((?i)a)(?:a)"}, {}, { 1 } }, | |
155 | } | |
156 | end | |
157 | ||
158 | return function (libname) | |
159 | local lib = require (libname) | |
160 | local flags = lib.flags () | |
161 | local sets = { | |
162 | set_f_internal_test (lib, flags), | |
163 | set_f_match (lib, flags), | |
164 | set_f_find (lib, flags), | |
165 | set_f_gmatch (lib, flags), | |
166 | set_f_split (lib, flags), | |
167 | set_m_exec (lib, flags), | |
168 | set_m_tfind (lib, flags), | |
169 | set_m_capturecount (lib, flags), | |
170 | } | |
171 | local MAJOR = tonumber(lib.version():match("%d+")) | |
172 | if MAJOR >= 0 then | |
173 | table.insert (sets, set_named_subpatterns (lib, flags)) | |
174 | end | |
175 | return sets | |
176 | end |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | -- Convert Lua regex pattern to its PCRE equivalent. | |
3 | ||
4 | local t_esc = { | |
5 | a = "[:alpha:]", | |
6 | A = "[:^alpha:]", | |
7 | c = "[:cntrl:]", | |
8 | C = "[:^cntrl:]", | |
9 | l = "[:lower:]", | |
10 | L = "[:^lower:]", | |
11 | p = "[:punct:]", | |
12 | P = "[:^punct:]", | |
13 | u = "[:upper:]", | |
14 | U = "[:^upper:]", | |
15 | w = "[:alnum:]", | |
16 | W = "[:^alnum:]", | |
17 | x = "[:xdigit:]", | |
18 | X = "[:^xdigit:]", | |
19 | z = "\\x00", | |
20 | Z = "\\x01-\\xFF", | |
21 | } | |
22 | ||
23 | local function rep_normal (ch) | |
24 | assert (ch ~= "b", "\"%b\" subpattern is not supported") | |
25 | assert (ch ~= "0", "invalid capture index") | |
26 | local v = t_esc[ch] | |
27 | return v and ("[" .. v .. "]") or ("\\" .. ch) | |
28 | end | |
29 | ||
30 | local function rep_charclass (ch) | |
31 | return t_esc[ch] or ("\\" .. ch) | |
32 | end | |
33 | ||
34 | function pat2pcre (s) | |
35 | local ind = 0 | |
36 | ||
37 | local function getc () | |
38 | ind = ind + 1 | |
39 | return string.sub (s, ind, ind) | |
40 | end | |
41 | ||
42 | local function getnum () | |
43 | local num = string.match (s, "^\\(%d%d?%d?)", ind) | |
44 | if num then | |
45 | ind = ind + #num | |
46 | return string.format ("\\x%02X", num) | |
47 | end | |
48 | end | |
49 | ||
50 | local out, state = "", "normal" | |
51 | while ind < #s do | |
52 | local ch = getc () | |
53 | if state == "normal" then | |
54 | if ch == "%" then | |
55 | out = out .. rep_normal (getc ()) | |
56 | elseif ch == "-" then | |
57 | out = out .. "*?" | |
58 | elseif ch == "." then | |
59 | out = out .. "\\C" | |
60 | elseif ch == "[" then | |
61 | out = out .. ch | |
62 | state = "charclass" | |
63 | else | |
64 | local num = getnum () | |
65 | out = num and (out .. num) or (out .. ch) | |
66 | end | |
67 | elseif state == "charclass" then | |
68 | if ch == "%" then | |
69 | out = out .. rep_charclass (getc ()) | |
70 | elseif ch == "]" then | |
71 | out = out .. ch | |
72 | state = "normal" | |
73 | else | |
74 | local num = getnum () | |
75 | out = num and (out .. num) or (out .. ch) | |
76 | end | |
77 | end | |
78 | end | |
79 | return out | |
80 | end | |
81 | ||
82 | return pat2pcre |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | local luatest = require "luatest" | |
3 | local N = luatest.NT | |
4 | local unpack = unpack or table.unpack | |
5 | ||
6 | local function norm(a) return a==nil and N or a end | |
7 | ||
8 | local function fill (n, m) | |
9 | local t = {} | |
10 | for i = n, m, -1 do table.insert (t, i) end | |
11 | return t | |
12 | end | |
13 | ||
14 | local function set_named_subpatterns (lib, flg) | |
15 | return { | |
16 | Name = "Named Subpatterns", | |
17 | Func = function (subj, methodname, patt, name1, name2) | |
18 | local r = lib.new (patt) | |
19 | local _,_,caps = r[methodname] (r, subj) | |
20 | return norm(caps[name1]), norm(caps[name2]) | |
21 | end, | |
22 | --{} N.B. subject is always first element | |
23 | { {"abcd", "tfind", "(?P<dog>.)b.(?P<cat>d)", "dog", "cat"}, {"a","d"} }, | |
24 | { {"abcd", "exec", "(?P<dog>.)b.(?P<cat>d)", "dog", "cat"}, {"a","d"} }, | |
25 | } | |
26 | end | |
27 | ||
28 | local function set_f_find (lib, flg) | |
29 | local cp1251 = | |
30 | "������¨�������������������������‗אבגדהו¸זחטיךכלםמןנסעףפץצקרשת���" | |
31 | local loc = "Russian_Russia.1251" | |
32 | return { | |
33 | Name = "Function find", | |
34 | Func = lib.find, | |
35 | --{subj, patt, st,cf,ef,lo}, { results } | |
36 | { {"abcd", ".+", 5}, { N } }, -- failing st | |
37 | { {"abcd", ".*?"}, { 1,0 } }, -- non-greedy | |
38 | { {"abc", "aBC", N,flg.CASELESS}, { 1,3 } }, -- cf | |
39 | { {"abc", "aBC", N,"i" }, { 1,3 } }, -- cf | |
40 | { {"abc", "bc", N,flg.ANCHORED}, { N } }, -- cf | |
41 | { {"abc", "bc", N,N,flg.ANCHORED}, { N } }, -- ef | |
42 | --{ {cp1251, "[[:upper:]]+", N,N,N, loc}, { 1,33} }, -- locale | |
43 | --{ {cp1251, "[[:lower:]]+", N,N,N, loc}, {34,66} }, -- locale | |
44 | } | |
45 | end | |
46 | ||
47 | local function set_f_match (lib, flg) | |
48 | return { | |
49 | Name = "Function match", | |
50 | Func = lib.match, | |
51 | --{subj, patt, st,cf,ef,lo}, { results } | |
52 | { {"abcd", ".+", 5}, { N }}, -- failing st | |
53 | { {"abcd", ".*?"}, { "" }}, -- non-greedy | |
54 | { {"abc", "aBC", N,flg.CASELESS}, {"abc" }}, -- cf | |
55 | { {"abc", "aBC", N,"i" }, {"abc" }}, -- cf | |
56 | { {"abc", "bc", N,flg.ANCHORED}, { N }}, -- cf | |
57 | { {"abc", "bc", N,N,flg.ANCHORED}, { N }}, -- ef | |
58 | } | |
59 | end | |
60 | ||
61 | local function set_f_gmatch (lib, flg) | |
62 | -- gmatch (s, p, [cf], [ef]) | |
63 | local pCSV = "[^,]*" | |
64 | local F = false | |
65 | local function test_gmatch (subj, patt) | |
66 | local out, guard = {}, 10 | |
67 | for a, b in lib.gmatch (subj, patt) do | |
68 | table.insert (out, { norm(a), norm(b) }) | |
69 | guard = guard - 1 | |
70 | if guard == 0 then break end | |
71 | end | |
72 | return unpack (out) | |
73 | end | |
74 | return { | |
75 | Name = "Function gmatch", | |
76 | Func = test_gmatch, | |
77 | --{ subj patt results } | |
78 | { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj | |
79 | { {"", pCSV}, {{"",N}} }, | |
80 | { {"12", pCSV}, {{"12",N}} }, | |
81 | { {",", pCSV}, {{"", N},{"", N}} }, | |
82 | { {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} }, | |
83 | { {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} }, | |
84 | { {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} }, | |
85 | { {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} }, | |
86 | } | |
87 | end | |
88 | ||
89 | local function set_f_split (lib, flg) | |
90 | -- split (s, p, [cf], [ef]) | |
91 | local function test_split (subj, patt) | |
92 | local out, guard = {}, 10 | |
93 | for a, b, c in lib.split (subj, patt) do | |
94 | table.insert (out, { norm(a), norm(b), norm(c) }) | |
95 | guard = guard - 1 | |
96 | if guard == 0 then break end | |
97 | end | |
98 | return unpack (out) | |
99 | end | |
100 | return { | |
101 | Name = "Function split", | |
102 | Func = test_split, | |
103 | --{ subj patt results } | |
104 | { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj | |
105 | { {"ab", "$"}, {{"ab","",N}, {"",N,N} } }, | |
106 | { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
107 | { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } }, | |
108 | { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } }, | |
109 | { {"ab", ".*" }, {{"","ab",N}, {"",N,N} } }, | |
110 | { {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } }, | |
111 | { {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }}, | |
112 | } | |
113 | end | |
114 | ||
115 | local function set_m_exec (lib, flg) | |
116 | return { | |
117 | Name = "Method exec", | |
118 | Method = "exec", | |
119 | --{patt,cf,lo}, {subj,st,ef} { results } | |
120 | { {".+"}, {"abcd",5}, { N } }, -- failing st | |
121 | { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy | |
122 | { {"aBC",flg.CASELESS}, {"abc"}, {1,3,{}} }, -- cf | |
123 | { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf | |
124 | { {"bc",flg.ANCHORED}, {"abc"}, { N } }, -- cf | |
125 | { {"bc"}, {"abc",N, flg.ANCHORED}, { N } }, -- ef | |
126 | } | |
127 | end | |
128 | ||
129 | local function set_m_tfind (lib, flg) | |
130 | return { | |
131 | Name = "Method tfind", | |
132 | Method = "tfind", | |
133 | --{patt,cf,lo}, {subj,st,ef} { results } | |
134 | { {".+"}, {"abcd",5}, { N } }, -- failing st | |
135 | { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy | |
136 | { {"aBC",flg.CASELESS}, {"abc"}, {1,3,{}} }, -- cf | |
137 | { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf | |
138 | { {"bc",flg.ANCHORED}, {"abc"}, { N } }, -- cf | |
139 | { {"bc"}, {"abc",N, flg.ANCHORED}, { N } }, -- ef | |
140 | } | |
141 | end | |
142 | ||
143 | local function set_m_dfa_exec (lib, flg) | |
144 | local ver = tonumber(lib.version():match("%d+%.%d+")) | |
145 | local NAP = ver < 8.34 and "" or "(*NO_AUTO_POSSESS)" | |
146 | local flag_partial = ver < 10.0 and flg.PARTIAL or flg.PARTIAL_SOFT | |
147 | return { | |
148 | Name = "Method dfa_exec", | |
149 | Method = "dfa_exec", | |
150 | --{patt,cf,lo}, {subj,st,ef,os,ws} { results } | |
151 | { {NAP..".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none] | |
152 | { {NAP..".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st | |
153 | { {NAP..".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st | |
154 | { {".+"}, {"abcd",5}, {N } }, -- failing st | |
155 | { {NAP..".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none] | |
156 | { {".*?"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- non-greedy | |
157 | { {"aBC",flg.CASELESS}, {"abc"}, {1,{3},1} }, -- cf | |
158 | { {"aBC","i" }, {"abc"}, {1,{3},1} }, -- cf | |
159 | { {"bc"}, {"abc"}, {2,{3},1} }, -- [none] | |
160 | { {"bc",flg.ANCHORED}, {"abc"}, {N } }, -- cf | |
161 | { {"bc"}, {"abc",N, flg.ANCHORED}, {N } }, -- ef | |
162 | { { "(.)b.(d)"}, {"abcd"}, {1,{4},1} }, --[captures] | |
163 | { {"abc"}, {"ab"}, {N } }, | |
164 | { {"abc"}, {"ab",N,flag_partial}, {1,{2},flg.ERROR_PARTIAL} }, | |
165 | { {NAP..".+"}, {string.rep("a",50),N,N,50,50}, {1, fill(50,26), 0}},-- small ovecsize | |
166 | } | |
167 | end | |
168 | ||
169 | local function set_m_fullinfo (lib, flg) | |
170 | local r = lib.new("(foo)(bar)") | |
171 | local info = r:fullinfo() | |
172 | assert(info.CAPTURECOUNT == 2) | |
173 | end | |
174 | ||
175 | return function (libname) | |
176 | local lib = require (libname) | |
177 | local flags = lib.flags () | |
178 | local sets = { | |
179 | set_f_match (lib, flags), | |
180 | set_f_find (lib, flags), | |
181 | set_f_gmatch (lib, flags), | |
182 | set_f_split (lib, flags), | |
183 | set_m_exec (lib, flags), | |
184 | set_m_tfind (lib, flags), | |
185 | set_m_fullinfo (lib, flags), | |
186 | } | |
187 | if flags.MAJOR >= 4 then | |
188 | table.insert (sets, set_named_subpatterns (lib, flags)) | |
189 | end | |
190 | if flags.MAJOR >= 6 then | |
191 | table.insert (sets, set_m_dfa_exec (lib, flags)) | |
192 | end | |
193 | return sets | |
194 | end |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | local pat2pcre = require "pat2pcre" | |
3 | local unpack = unpack or table.unpack | |
4 | ||
5 | local function get_gsub (lib) | |
6 | return lib.gsub or | |
7 | function (subj, pattern, repl, n) | |
8 | return lib.new (pattern) : gsub (subj, repl, n) | |
9 | end | |
10 | end | |
11 | ||
12 | local function set_f_gsub1 (lib, flg) | |
13 | local subj, pat = "abcdef", "[abef]+" | |
14 | return { | |
15 | Name = "Function gsub, set1", | |
16 | Func = get_gsub (lib), | |
17 | --{ s, p, f, n, res1, res2, res3 }, | |
18 | { {"a\0c", ".", "#" }, {"###", 3, 3} }, -- subj contains nuls | |
19 | } | |
20 | end | |
21 | ||
22 | local function set_f_gsub4 (lib, flg) | |
23 | local pCSV = "(^[^,]*)|,([^,]*)" | |
24 | local fCSV = function (a,b) return "["..(a or b).."]" end | |
25 | local set = { | |
26 | Name = "Function gsub, set4", | |
27 | Func = get_gsub (lib), | |
28 | --{ s, p, f, n, res1, res2, res3 }, | |
29 | { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} }, | |
30 | { {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5, 5} }, -- test .- | |
31 | { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} }, | |
32 | { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} }, | |
33 | { {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d | |
34 | { {"a2c3", "%D", "#" }, {"#2#3", 2, 2} }, -- test %D | |
35 | { {"a \t\nb", "%s", "#" }, {"a###b", 3, 3} }, -- test %s | |
36 | { {"a \t\nb", "%S", "#" }, {"# \t\n#", 2, 2} }, -- test %S | |
37 | { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} }, | |
38 | { {"", pCSV,fCSV}, {"[]", 1, 1} }, | |
39 | { {"123", pCSV,fCSV}, {"[123]", 1, 1} }, | |
40 | { {",", pCSV,fCSV}, {"[],", 1, 1} }, | |
41 | { {"123,,456", pCSV,fCSV}, {"[123][][456]", 3, 3}}, | |
42 | { {",,123,456,,abc,789,", pCSV,fCSV}, {"[],[123][456][][abc][789][]", 7, 7}}, | |
43 | } | |
44 | -- convert patterns: lua -> pcre | |
45 | for _, test in ipairs (set) do | |
46 | test[1][2] = pat2pcre (test[1][2]) | |
47 | end | |
48 | return set | |
49 | end | |
50 | ||
51 | local function set_f_gsub7 (lib, flg) | |
52 | local subj = "" | |
53 | for i = 0, 255 do | |
54 | subj = subj .. string.char (i) | |
55 | end | |
56 | ||
57 | -- This set requires calling prepare_set before calling gsub_test | |
58 | local set = { | |
59 | Name = "Function gsub, set7", | |
60 | Func = get_gsub (lib), | |
61 | --{ s, p, f, n, }, | |
62 | { {subj, "%a", "" }, }, | |
63 | { {subj, "%A", "" }, }, | |
64 | { {subj, "%c", "" }, }, | |
65 | { {subj, "%C", "" }, }, | |
66 | { {subj, "%l", "" }, }, | |
67 | { {subj, "%L", "" }, }, | |
68 | { {subj, "%p", "" }, }, | |
69 | { {subj, "%P", "" }, }, | |
70 | { {subj, "%u", "" }, }, | |
71 | { {subj, "%U", "" }, }, | |
72 | { {subj, "%w", "" }, }, | |
73 | { {subj, "%W", "" }, }, | |
74 | { {subj, "%x", "" }, }, | |
75 | { {subj, "%X", "" }, }, | |
76 | { {subj, "%z", "" }, }, | |
77 | { {subj, "%Z", "" }, }, | |
78 | ||
79 | { {subj, "[%a]", "" }, }, | |
80 | { {subj, "[%A]", "" }, }, | |
81 | { {subj, "[%c]", "" }, }, | |
82 | { {subj, "[%C]", "" }, }, | |
83 | { {subj, "[%l]", "" }, }, | |
84 | { {subj, "[%L]", "" }, }, | |
85 | { {subj, "[%p]", "" }, }, | |
86 | { {subj, "[%P]", "" }, }, | |
87 | { {subj, "[%u]", "" }, }, | |
88 | { {subj, "[%U]", "" }, }, | |
89 | { {subj, "[%w]", "" }, }, | |
90 | { {subj, "[%W]", "" }, }, | |
91 | { {subj, "[%x]", "" }, }, | |
92 | { {subj, "[%X]", "" }, }, | |
93 | { {subj, "[%z]", "" }, }, | |
94 | { {subj, "[%Z]", "" }, }, | |
95 | ||
96 | { {subj, "[%a_]", "" }, }, | |
97 | { {subj, "[%A_]", "" }, }, | |
98 | { {subj, "[%c_]", "" }, }, | |
99 | { {subj, "[%C_]", "" }, }, | |
100 | { {subj, "[%l_]", "" }, }, | |
101 | { {subj, "[%L_]", "" }, }, | |
102 | { {subj, "[%p_]", "" }, }, | |
103 | { {subj, "[%P_]", "" }, }, | |
104 | { {subj, "[%u_]", "" }, }, | |
105 | { {subj, "[%U_]", "" }, }, | |
106 | { {subj, "[%w_]", "" }, }, | |
107 | { {subj, "[%W_]", "" }, }, | |
108 | { {subj, "[%x_]", "" }, }, | |
109 | { {subj, "[%X_]", "" }, }, | |
110 | { {subj, "[%z_]", "" }, }, | |
111 | { {subj, "[%Z_]", "" }, }, | |
112 | ||
113 | { {subj, "[%a%d]", "" }, }, | |
114 | { {subj, "[%A%d]", "" }, }, | |
115 | { {subj, "[%c%d]", "" }, }, | |
116 | { {subj, "[%C%d]", "" }, }, | |
117 | { {subj, "[%l%d]", "" }, }, | |
118 | { {subj, "[%L%d]", "" }, }, | |
119 | { {subj, "[%p%d]", "" }, }, | |
120 | { {subj, "[%P%d]", "" }, }, | |
121 | { {subj, "[%u%d]", "" }, }, | |
122 | { {subj, "[%U%d]", "" }, }, | |
123 | { {subj, "[%w%d]", "" }, }, | |
124 | { {subj, "[%W%d]", "" }, }, | |
125 | { {subj, "[%x%d]", "" }, }, | |
126 | { {subj, "[%X%d]", "" }, }, | |
127 | { {subj, "[%z%d]", "" }, }, | |
128 | { {subj, "[%Z%d]", "" }, }, | |
129 | ||
130 | { {subj, "[^%a%d]", "" }, }, | |
131 | { {subj, "[^%A%d]", "" }, }, | |
132 | { {subj, "[^%c%d]", "" }, }, | |
133 | { {subj, "[^%C%d]", "" }, }, | |
134 | { {subj, "[^%l%d]", "" }, }, | |
135 | { {subj, "[^%L%d]", "" }, }, | |
136 | { {subj, "[^%p%d]", "" }, }, | |
137 | { {subj, "[^%P%d]", "" }, }, | |
138 | { {subj, "[^%u%d]", "" }, }, | |
139 | { {subj, "[^%U%d]", "" }, }, | |
140 | { {subj, "[^%w%d]", "" }, }, | |
141 | { {subj, "[^%W%d]", "" }, }, | |
142 | { {subj, "[^%x%d]", "" }, }, | |
143 | { {subj, "[^%X%d]", "" }, }, | |
144 | { {subj, "[^%z%d]", "" }, }, | |
145 | { {subj, "[^%Z%d]", "" }, }, | |
146 | ||
147 | { {subj, "[^%a_]", "" }, }, | |
148 | { {subj, "[^%A_]", "" }, }, | |
149 | { {subj, "[^%c_]", "" }, }, | |
150 | { {subj, "[^%C_]", "" }, }, | |
151 | { {subj, "[^%l_]", "" }, }, | |
152 | { {subj, "[^%L_]", "" }, }, | |
153 | { {subj, "[^%p_]", "" }, }, | |
154 | { {subj, "[^%P_]", "" }, }, | |
155 | { {subj, "[^%u_]", "" }, }, | |
156 | { {subj, "[^%U_]", "" }, }, | |
157 | { {subj, "[^%w_]", "" }, }, | |
158 | { {subj, "[^%W_]", "" }, }, | |
159 | { {subj, "[^%x_]", "" }, }, | |
160 | { {subj, "[^%X_]", "" }, }, | |
161 | { {subj, "[^%z_]", "" }, }, | |
162 | { {subj, "[^%Z_]", "" }, }, | |
163 | ||
164 | { {subj, "\100", "" }, }, | |
165 | { {subj, "[\100]", "" }, }, | |
166 | { {subj, "[^\100]", "" }, }, | |
167 | { {subj, "[\100-\200]", "" }, }, | |
168 | { {subj, "[^\100-\200]", "" }, }, | |
169 | { {subj, "\100a", "" }, }, | |
170 | { {subj, "[\100a]", "" }, }, | |
171 | { {subj, "[^\100a]", "" }, }, | |
172 | { {subj, "[\100-\200a]", "" }, }, | |
173 | { {subj, "[^\100-\200a]", "" }, }, | |
174 | } | |
175 | -- fill in reference results | |
176 | for _,v in ipairs(set) do | |
177 | local r0, r1, r2 = pcall (string.gsub, unpack (v[1])) | |
178 | v[2] = r0 and { r1, r2, r2 } or { r0, r1 } | |
179 | end | |
180 | -- convert patterns: lua -> pcre | |
181 | for _, test in ipairs (set) do | |
182 | test[1][2] = pat2pcre (test[1][2]) | |
183 | end | |
184 | return set | |
185 | end | |
186 | ||
187 | return function (libname) | |
188 | local lib = require (libname) | |
189 | local flags = lib.flags and lib.flags () | |
190 | local sets = { | |
191 | set_f_gsub1 (lib, flags), | |
192 | set_f_gsub4 (lib, flags), | |
193 | } | |
194 | if flags.MAJOR*100 + flags.MINOR > 405 then | |
195 | table.insert (sets, set_f_gsub7 (lib, flags)) | |
196 | end | |
197 | return sets | |
198 | end | |
199 |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | local luatest = require "luatest" | |
3 | local N = luatest.NT | |
4 | ||
5 | local function set_f_find (lib, flg) | |
6 | return { | |
7 | Name = "Function find", | |
8 | Func = lib.find, | |
9 | --{subj, patt, st,cf,ef}, { results } | |
10 | { {"abcd", ".+", 5}, { N } }, -- failing st | |
11 | { {"abc", "aBC", N, flg.ICASE}, { 1,3 } }, -- cf | |
12 | { {"abc", "^abc"}, { 1,3 } }, -- anchor | |
13 | { {"abc", "^abc", N,N,flg.NOTBOL}, { N } }, -- anchor + ef | |
14 | } | |
15 | end | |
16 | ||
17 | local function set_f_match (lib, flg) | |
18 | return { | |
19 | Name = "Function match", | |
20 | Func = lib.match, | |
21 | --{subj, patt, st,cf,ef}, { results } | |
22 | { {"abcd", ".+", 5}, { N } }, -- failing st | |
23 | { {"abc", "aBC", N, flg.ICASE}, {"abc" } }, -- cf | |
24 | { {"abc", "^abc"}, {"abc" } }, -- anchor | |
25 | { {"abc", "^abc", N,N,flg.NOTBOL}, { N } }, -- anchor + ef | |
26 | } | |
27 | end | |
28 | ||
29 | local function set_m_exec (lib, flg) | |
30 | return { | |
31 | Name = "Method exec", | |
32 | Method = "exec", | |
33 | -- {patt,cf}, {subj,st,ef} { results } | |
34 | { {".+"}, {"abcd",5}, { N } }, -- failing st | |
35 | { {"aBC",flg.ICASE}, {"abc"}, {1,3,{}} }, -- cf | |
36 | { {"^abc"}, {"abc"}, {1,3,{}} }, -- anchor | |
37 | { {"^abc"}, {"abc",N,flg.NOTBOL}, { N } }, -- anchor + ef | |
38 | } | |
39 | end | |
40 | ||
41 | local function set_m_tfind (lib, flg) | |
42 | return { | |
43 | Name = "Method tfind", | |
44 | Method = "tfind", | |
45 | -- {patt,cf}, {subj,st,ef} { results } | |
46 | { {".+"}, {"abcd",5}, { N } }, -- failing st | |
47 | { {"aBC",flg.ICASE}, {"abc"}, {1,3,{}} }, -- cf | |
48 | { {"^abc"}, {"abc"}, {1,3,{}} }, -- anchor | |
49 | { {"^abc"}, {"abc",N,flg.NOTBOL}, { N } }, -- anchor + ef | |
50 | } | |
51 | end | |
52 | ||
53 | return function (libname) | |
54 | local lib = require (libname) | |
55 | local flags = lib.flags () | |
56 | return { | |
57 | set_f_match (lib, flags), | |
58 | set_f_find (lib, flags), | |
59 | set_m_exec (lib, flags), | |
60 | set_m_tfind (lib, flags), | |
61 | } | |
62 | end |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | do | |
3 | local path = "./?.lua;" | |
4 | if package.path:sub(1, #path) ~= path then | |
5 | package.path = path .. package.path | |
6 | end | |
7 | end | |
8 | local luatest = require "luatest" | |
9 | ||
10 | local function newalienbuffer (str) | |
11 | local alien = require "alien" | |
12 | local buf = alien.buffer (#str) | |
13 | if #str > 0 then | |
14 | alien.memmove (buf:topointer (), str, #str) | |
15 | end | |
16 | return buf | |
17 | end | |
18 | ||
19 | -- returns: number of failures | |
20 | local function test_library (libname, setfile, verbose, use_alien) | |
21 | if verbose then | |
22 | print (("[lib: %s; file: %s]"):format (libname, setfile)) | |
23 | end | |
24 | local lib = require (libname) | |
25 | local f = require (setfile) | |
26 | local sets = f (libname) | |
27 | ||
28 | local newmembuffer = use_alien and newalienbuffer or lib._newmembuffer | |
29 | if newmembuffer then | |
30 | if libname == "rex_posix" and not lib.flags ().STARTEND then | |
31 | newmembuffer = nil | |
32 | io.stderr:write ("Cannot run posix tests with buffer subjects without REG_STARTEND\n") | |
33 | end | |
34 | else | |
35 | io.stderr:write ("Warning: cannot run tests with buffer subjects\n") | |
36 | end | |
37 | ||
38 | local n = 0 -- number of failures | |
39 | for _, set in ipairs (sets) do | |
40 | if verbose then | |
41 | print (set.Name or "Unnamed set") | |
42 | end | |
43 | local err = luatest.test_set (set, lib, newmembuffer) | |
44 | if verbose then | |
45 | for _,v in ipairs (err) do | |
46 | print (" Test " .. v.i) | |
47 | luatest.print_results (v, " ") | |
48 | end | |
49 | end | |
50 | n = n + #err | |
51 | end | |
52 | if verbose then | |
53 | print "" | |
54 | end | |
55 | return n | |
56 | end | |
57 | ||
58 | local avail_tests = { | |
59 | posix = { lib = "rex_posix", "common_sets", "posix_sets" }, | |
60 | gnu = { lib = "rex_gnu", "common_sets", "emacs_sets", "gnu_sets" }, | |
61 | oniguruma = { lib = "rex_onig", "common_sets", "oniguruma_sets", }, | |
62 | pcre = { lib = "rex_pcre", "common_sets", "pcre_sets", "pcre_sets2", }, | |
63 | pcre2 = { lib = "rex_pcre2", "common_sets", "pcre_sets", "pcre_sets2", }, | |
64 | spencer = { lib = "rex_spencer", "common_sets", "posix_sets", "spencer_sets" }, | |
65 | tre = { lib = "rex_tre", "common_sets", "posix_sets", "spencer_sets", --[["tre_sets"]] }, | |
66 | } | |
67 | ||
68 | do | |
69 | local verbose, tests, nerr = false, {}, 0 | |
70 | local dir | |
71 | local use_alien | |
72 | -- check arguments | |
73 | for i = 1, select ("#", ...) do | |
74 | local arg = select (i, ...) | |
75 | if arg:sub(1,1) == "-" then | |
76 | if arg == "-v" then | |
77 | verbose = true | |
78 | elseif arg == "-a" then | |
79 | use_alien = true | |
80 | elseif arg:sub(1,2) == "-d" then | |
81 | dir = arg:sub(3) | |
82 | else | |
83 | error ("invalid argument: [" .. arg .. "]") | |
84 | end | |
85 | else | |
86 | if avail_tests[arg] then | |
87 | tests[#tests+1] = avail_tests[arg] | |
88 | else | |
89 | error ("invalid argument: [" .. arg .. "]") | |
90 | end | |
91 | end | |
92 | end | |
93 | assert (#tests > 0, "no library specified") | |
94 | -- give priority to libraries located in the specified directory | |
95 | if dir then | |
96 | dir = dir:gsub("[/\\]+$", "") | |
97 | for _, ext in ipairs {"dll", "so", "dylib"} do | |
98 | if package.cpath:match ("%?%." .. ext) then | |
99 | local cpath = dir .. "/?." .. ext .. ";" | |
100 | if package.cpath:sub(1, #cpath) ~= cpath then | |
101 | package.cpath = cpath .. package.cpath | |
102 | end | |
103 | break | |
104 | end | |
105 | end | |
106 | end | |
107 | -- do tests | |
108 | for _, test in ipairs (tests) do | |
109 | package.loaded[test.lib] = nil -- to force-reload the tested library | |
110 | for _, setfile in ipairs (test) do | |
111 | nerr = nerr + test_library (test.lib, setfile, verbose, use_alien) | |
112 | end | |
113 | end | |
114 | print ("Total number of failures: " .. nerr) | |
115 | end |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | local luatest = require "luatest" | |
3 | local N = luatest.NT | |
4 | local unpack = unpack or table.unpack | |
5 | ||
6 | local function norm(a) return a==nil and N or a end | |
7 | ||
8 | local function get_gsub (lib) | |
9 | return lib.gsub or | |
10 | function (subj, pattern, repl, n) | |
11 | return lib.new (pattern) : gsub (subj, repl, n) | |
12 | end | |
13 | end | |
14 | ||
15 | local function set_f_gsub1 (lib, flg) | |
16 | local subj, pat = "abcdef", "[abef]+" | |
17 | return { | |
18 | Name = "Function gsub, set1", | |
19 | Func = get_gsub (lib), | |
20 | --{ s, p, f, n, res1, res2, res3 }, | |
21 | { {"a\0c", ".", "#" }, {"###", 3, 3} }, -- subj contains nuls | |
22 | } | |
23 | end | |
24 | ||
25 | local function set_f_find (lib, flg) | |
26 | return { | |
27 | Name = "Function find", | |
28 | Func = lib.find, | |
29 | --{subj, patt, st,cf,ef}, { results } | |
30 | { {"a\0c", ".+"}, { 1,3 } }, -- subj contains nul | |
31 | { {"a\0c", "a\0c", N,flg.PEND}, { 1,3 } }, -- subj and patt contain nul | |
32 | } | |
33 | end | |
34 | ||
35 | local function set_f_match (lib, flg) | |
36 | return { | |
37 | Name = "Function match", | |
38 | Func = lib.match, | |
39 | --{subj, patt, st,cf,ef}, { results } | |
40 | { {"a\0c", ".+"}, {"a\0c"} }, -- subj contains nul | |
41 | { {"a\0c", "a\0c", N,flg.PEND}, {"a\0c"} }, -- subj and patt contain nul | |
42 | } | |
43 | end | |
44 | ||
45 | local function set_f_gmatch (lib, flg) | |
46 | -- gmatch (s, p, [cf], [ef]) | |
47 | local function test_gmatch (subj, patt) | |
48 | local out, guard = {}, 10 | |
49 | for a, b in lib.gmatch (subj, patt) do | |
50 | table.insert (out, { norm(a), norm(b) }) | |
51 | guard = guard - 1 | |
52 | if guard == 0 then break end | |
53 | end | |
54 | return unpack (out) | |
55 | end | |
56 | return { | |
57 | Name = "Function gmatch", | |
58 | Func = test_gmatch, | |
59 | --{ subj patt results } | |
60 | { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj | |
61 | } | |
62 | end | |
63 | ||
64 | local function set_f_split (lib, flg) | |
65 | -- split (s, p, [cf], [ef]) | |
66 | local function test_split (subj, patt) | |
67 | local out, guard = {}, 10 | |
68 | for a, b, c in lib.split (subj, patt) do | |
69 | table.insert (out, { norm(a), norm(b), norm(c) }) | |
70 | guard = guard - 1 | |
71 | if guard == 0 then break end | |
72 | end | |
73 | return unpack (out) | |
74 | end | |
75 | return { | |
76 | Name = "Function split", | |
77 | Func = test_split, | |
78 | --{ subj patt results } | |
79 | { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj | |
80 | } | |
81 | end | |
82 | ||
83 | local function set_m_exec (lib, flg) | |
84 | return { | |
85 | Name = "Method exec", | |
86 | Method = "exec", | |
87 | -- {patt,cf}, {subj,st,ef} { results } | |
88 | { {".+"}, {"a\0c"}, {1,3,{}} }, -- subj contains nul | |
89 | { {"a\0c",flg.PEND}, {"a\0c"}, {1,3,{}} }, -- subj and patt contain nul | |
90 | } | |
91 | end | |
92 | ||
93 | local function set_m_tfind (lib, flg) | |
94 | return { | |
95 | Name = "Method tfind", | |
96 | Method = "tfind", | |
97 | -- {patt,cf}, {subj,st,ef} { results } | |
98 | { {".+"}, {"a\0c"}, {1,3,{}} }, -- subj contains nul | |
99 | { {"a\0c",flg.PEND}, {"a\0c"}, {1,3,{}} }, -- subj and patt contain nul | |
100 | } | |
101 | end | |
102 | ||
103 | return function (libname) | |
104 | local lib = require (libname) | |
105 | local flags = lib.flags () | |
106 | return { | |
107 | set_f_match (lib, flags), | |
108 | set_f_find (lib, flags), | |
109 | set_f_gmatch (lib, flags), | |
110 | set_f_gsub1 (lib, flags), | |
111 | set_m_exec (lib, flags), | |
112 | set_m_tfind (lib, flags), | |
113 | } | |
114 | end | |
115 |
0 | -- See Copyright Notice in the file LICENSE | |
1 | ||
2 | -- This file should contain only test sets that behave identically | |
3 | -- when being run with pcre or posix regex libraries. | |
4 | ||
5 | local luatest = require "luatest" | |
6 | local N = luatest.NT | |
7 | local unpack = unpack or table.unpack | |
8 | ||
9 | local L = function(s) return (string.gsub(s, ".", "%0\0")) end | |
10 | ||
11 | local function norm(a) return a==nil and N or a end | |
12 | ||
13 | local function get_wgsub (lib) | |
14 | return lib.wgsub or | |
15 | function (subj, pattern, repl, n) | |
16 | return lib.wnew (pattern) : wgsub (subj, repl, n) | |
17 | end | |
18 | end | |
19 | ||
20 | local function set_f_wgmatch (lib, flg) | |
21 | -- gmatch (s, p, [cf], [ef]) | |
22 | local function test_wgmatch (subj, patt) | |
23 | local out, guard = {}, 10 | |
24 | for a, b in lib.wgmatch (subj, patt) do | |
25 | table.insert (out, { norm(a), norm(b) }) | |
26 | guard = guard - 1 | |
27 | if guard == 0 then break end | |
28 | end | |
29 | return unpack (out) | |
30 | end | |
31 | return { | |
32 | Name = "Function wgmatch", | |
33 | Func = test_wgmatch, | |
34 | --{ subj patt results } | |
35 | { {L"ab", lib.wnew(L".")}, {{L"a",N}, {L"b",N} } }, | |
36 | { {(L"abcd"):rep(3), L"(.)b.(d)"}, {{L"a",L"d"},{L"a",L"d"},{L"a",L"d"}} }, | |
37 | { {L"abcd", L".*" }, {{L"abcd",N},{L"",N} } },--zero-length match | |
38 | { {L"abc", L"^." }, {{L"a",N}} },--anchored pattern | |
39 | } | |
40 | end | |
41 | ||
42 | local function set_f_wsplit (lib, flg) | |
43 | -- split (s, p, [cf], [ef]) | |
44 | local function test_wsplit (subj, patt) | |
45 | local out, guard = {}, 10 | |
46 | for a, b, c in lib.wsplit (subj, patt) do | |
47 | table.insert (out, { norm(a), norm(b), norm(c) }) | |
48 | guard = guard - 1 | |
49 | if guard == 0 then break end | |
50 | end | |
51 | return unpack (out) | |
52 | end | |
53 | return { | |
54 | Name = "Function wsplit", | |
55 | Func = test_wsplit, | |
56 | --{ subj patt results } | |
57 | { {L"ab", lib.wnew(L",")}, {{L"ab",N,N}, } }, | |
58 | { {L"ab", L","}, {{L"ab",N,N}, } }, | |
59 | { {L",", L","}, {{L"",L",",N}, {L"", N, N}, } }, | |
60 | { {L",,", L","}, {{L"",L",",N}, {L"",L",",N}, {L"",N,N} } }, | |
61 | { {L"a,b", L","}, {{L"a",L",",N}, {L"b",N,N}, } }, | |
62 | { {L",a,b", L","}, {{L"",L",",N}, {L"a",L",",N}, {L"b",N,N}} }, | |
63 | { {L"a,b,", L","}, {{L"a",L",",N}, {L"b",L",",N}, {L"",N,N} } }, | |
64 | { {L"a,,b", L","}, {{L"a",L",",N}, {L"",L",",N}, {L"b",N,N}} }, | |
65 | { {L"ab<78>c", L"<(.)(.)>"}, {{L"ab",L"7",L"8"}, {L"c",N,N}, } }, | |
66 | { {L"abc", L"^."}, {{L"", L"a",N}, {L"bc",N,N}, } },--anchored pattern | |
67 | { {L"abc", L"^"}, {{L"", L"", N}, {L"abc",N,N}, } }, | |
68 | -- { {"abc", "$"}, {{"abc","",N}, {"",N,N}, } }, | |
69 | -- { {"abc", "^|$"}, {{"", "", N}, {"abc","",N},{"",N,N},} }, | |
70 | } | |
71 | end | |
72 | ||
73 | local function set_f_wfind (lib, flg) | |
74 | return { | |
75 | Name = "Function wfind", | |
76 | Func = lib.wfind, | |
77 | -- {subj, patt, st}, { results } | |
78 | { {L"abcd", lib.wnew(L".+")}, { 1,4 } }, -- [none] | |
79 | { {L"abcd", L".+"}, { 1,4 } }, -- [none] | |
80 | { {L"abcd", L".+", 2}, { 2,4 } }, -- positive st | |
81 | { {L"abcd", L".+", -2}, { 3,4 } }, -- negative st | |
82 | { {L"abcd", L".*"}, { 1,4 } }, -- [none] | |
83 | { {L"abc", L"bc"}, { 2,3 } }, -- [none] | |
84 | { {L"abcd", L"(.)b.(d)"}, { 1,4,L"a",L"d" }}, -- [captures] | |
85 | } | |
86 | end | |
87 | ||
88 | local function set_f_wmatch (lib, flg) | |
89 | return { | |
90 | Name = "Function wmatch", | |
91 | Func = lib.wmatch, | |
92 | -- {subj, patt, st}, { results } | |
93 | { {L"abcd", lib.wnew(L".+")}, {L"abcd"} }, -- [none] | |
94 | { {L"abcd", L".+"}, {L"abcd"} }, -- [none] | |
95 | { {L"abcd", L".+", 2}, {L"bcd"} }, -- positive st | |
96 | { {L"abcd", L".+", -2}, {L"cd"} }, -- negative st | |
97 | { {L"abcd", L".*"}, {L"abcd"} }, -- [none] | |
98 | { {L"abc", L"bc"}, {L"bc"} }, -- [none] | |
99 | { {L"abcd", L"(.)b.(d)"}, {L"a",L"d"} }, -- [captures] | |
100 | } | |
101 | end | |
102 | ||
103 | local function set_m_wexec (lib, flg) | |
104 | return { | |
105 | Name = "Method wexec", | |
106 | Method = "wexec", | |
107 | --{patt}, {subj, st} { results } | |
108 | { {L".+"}, {L"abcd"}, {1,4,{}} }, -- [none] | |
109 | { {L".+"}, {L"abcd",2}, {2,4,{}} }, -- positive st | |
110 | { {L".+"}, {L"abcd",-2}, {3,4,{}} }, -- negative st | |
111 | { {L".*"}, {L"abcd"}, {1,4,{}} }, -- [none] | |
112 | { {L"bc"}, {L"abc"}, {2,3,{}} }, -- [none] | |
113 | { {L "(.)b.(d)"}, {L"abcd"}, {1,4,{1,1,4,4}}},--[captures] | |
114 | { {L"(a+)6+(b+)"}, {L"Taa66bbT",2}, {2,7,{2,3,6,7}}},--[st+captures] | |
115 | } | |
116 | end | |
117 | ||
118 | local function set_m_waexec (lib, flg) | |
119 | return { | |
120 | Name = "Method waexec", | |
121 | Method = "waexec", | |
122 | --{patt}, {subj, st} { results } | |
123 | { {L".+"}, {L"abcd",{}}, {1,4,{}} }, -- [none] | |
124 | { {L".+"}, {L"abcd",{},2}, {2,4,{}} }, -- positive st | |
125 | { {L".+"}, {L"abcd",{},-2}, {3,4,{}} }, -- negative st | |
126 | { {L".*"}, {L"abcd",{}}, {1,4,{}} }, -- [none] | |
127 | { {L"bc"}, {L"abc", {}}, {2,3,{}} }, -- [none] | |
128 | { {L "(.)b.(d)"}, {L"abcd",{}}, {1,4,{1,1,4,4}}},--[captures] | |
129 | { {L"(a+)6+(b+)"}, {L"Taa66bbT",{},2}, {2,7,{2,3,6,7}}},--[st+captures] | |
130 | } | |
131 | end | |
132 | ||
133 | local function set_m_wtfind (lib, flg) | |
134 | return { | |
135 | Name = "Method wtfind", | |
136 | Method = "wtfind", | |
137 | --{patt}, {subj, st} { results } | |
138 | { {L".+"}, {L"abcd"}, {1,4,{}} }, -- [none] | |
139 | { {L".+"}, {L"abcd",2}, {2,4,{}} }, -- positive st | |
140 | { {L".+"}, {L"abcd",-2}, {3,4,{}} }, -- negative st | |
141 | { {L".*"}, {L"abcd"}, {1,4,{}} }, -- [none] | |
142 | { {L"bc"}, {L"abc"}, {2,3,{}} }, -- [none] | |
143 | { {L"(.)b.(d)"}, {L"abcd"}, {1,4,{L"a",L"d"}}},--[captures] | |
144 | } | |
145 | end | |
146 | ||
147 | local function set_m_watfind (lib, flg) | |
148 | return { | |
149 | Name = "Method watfind", | |
150 | Method = "watfind", | |
151 | --{patt}, {subj, st} { results } | |
152 | { {L".+"}, {L"abcd",{}}, {1,4,{}} }, -- [none] | |
153 | { {L".+"}, {L"abcd",{},2}, {2,4,{}} }, -- positive st | |
154 | { {L".+"}, {L"abcd",{},-2}, {3,4,{}} }, -- negative st | |
155 | { {L".*"}, {L"abcd",{}}, {1,4,{}} }, -- [none] | |
156 | { {L"bc"}, {L"abc", {}}, {2,3,{}} }, -- [none] | |
157 | { {L"(.)b.(d)"}, {L"abcd",{}}, {1,4,{L"a",L"d"}}},--[captures] | |
158 | } | |
159 | end | |
160 | ||
161 | local function set_m_wfind (lib, flg) | |
162 | return { | |
163 | Name = "Method wfind", | |
164 | Method = "wfind", | |
165 | --{patt}, {subj, st} { results } | |
166 | { {L".+"}, {L"abcd"}, {1,4} }, -- [none] | |
167 | { {L".+"}, {L"abcd",2}, {2,4} }, -- positive st | |
168 | { {L".+"}, {L"abcd",-2}, {3,4} }, -- negative st | |
169 | { {L".*"}, {L"abcd"}, {1,4} }, -- [none] | |
170 | { {L"bc"}, {L"abc"}, {2,3} }, -- [none] | |
171 | { {L"(.)b.(d)"}, {L"abcd"}, {1,4,L"a",L"d"}},--[captures] | |
172 | } | |
173 | end | |
174 | ||
175 | local function set_m_wmatch (lib, flg) | |
176 | return { | |
177 | Name = "Method wmatch", | |
178 | Method = "wmatch", | |
179 | --{patt}, {subj, st} { results } | |
180 | { {L".+"}, {L"abcd"}, {L"abcd"} }, -- [none] | |
181 | { {L".+"}, {L"abcd",2}, {L"bcd" } }, -- positive st | |
182 | { {L".+"}, {L"abcd",-2}, {L"cd" } }, -- negative st | |
183 | { {L".*"}, {L"abcd"}, {L"abcd"} }, -- [none] | |
184 | { {L"bc"}, {L"abc"}, {L"bc" } }, -- [none] | |
185 | {{ L"(.)b.(d)"}, {L"abcd"}, {L"a",L"d"} }, --[captures] | |
186 | } | |
187 | end | |
188 | ||
189 | local function set_f_wgsub1 (lib, flg) | |
190 | local subj, pat = L"abcdef", L"[abef]+" | |
191 | local cpat = lib.wnew(pat) | |
192 | return { | |
193 | Name = "Function wgsub, set1", | |
194 | Func = get_wgsub (lib), | |
195 | --{ s, p, f, n, res1, res2, res3 }, | |
196 | { {subj, cpat, L"", 0}, {subj, 0, 0} }, -- test "n" + empty_replace | |
197 | { {subj, pat, L"", 0}, {subj, 0, 0} }, -- test "n" + empty_replace | |
198 | { {subj, pat, L"", -1}, {subj, 0, 0} }, -- test "n" + empty_replace | |
199 | { {subj, pat, L"", 1}, {L"cdef", 1, 1} }, | |
200 | { {subj, pat, L"", 2}, {L"cd", 2, 2} }, | |
201 | { {subj, pat, L"", 3}, {L"cd", 2, 2} }, | |
202 | { {subj, pat, L"" }, {L"cd", 2, 2} }, | |
203 | { {subj, pat, L"#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace | |
204 | { {subj, pat, L"#", 1}, {L"#cdef", 1, 1} }, | |
205 | { {subj, pat, L"#", 2}, {L"#cd#", 2, 2} }, | |
206 | { {subj, pat, L"#", 3}, {L"#cd#", 2, 2} }, | |
207 | { {subj, pat, L"#" }, {L"#cd#", 2, 2} }, | |
208 | { {L"abc", L"^.",L"#" }, {L"#bc", 1, 1} }, -- anchored pattern | |
209 | } | |
210 | end | |
211 | ||
212 | local function set_f_wgsub2 (lib, flg) | |
213 | local subj, pat = L"abc", L"([ac])" | |
214 | return { | |
215 | Name = "Function wgsub, set2", | |
216 | Func = get_wgsub (lib), | |
217 | --{ s, p, f, n, res1, res2, res3 }, | |
218 | { {subj, pat, L"<%1>" }, {L"<a>b<c>", 2, 2} }, -- test non-escaped chars in f | |
219 | { {subj, pat, L"%<%1%>" }, {L"<a>b<c>", 2, 2} }, -- test escaped chars in f | |
220 | { {subj, pat, L"" }, {L"b", 2, 2} }, -- test empty replace | |
221 | { {subj, pat, L"1" }, {L"1b1", 2, 2} }, -- test odd and even %'s in f | |
222 | { {subj, pat, L"%1" }, {L"abc", 2, 2} }, | |
223 | { {subj, pat, L"%%1" }, {L"%1b%1", 2, 2} }, | |
224 | { {subj, pat, L"%%%1" }, {L"%ab%c", 2, 2} }, | |
225 | { {subj, pat, L"%%%%1" }, {L"%%1b%%1", 2, 2} }, | |
226 | { {subj, pat, L"%%%%%1" }, {L"%%ab%%c", 2, 2} }, | |
227 | } | |
228 | end | |
229 | ||
230 | local function set_f_wgsub3 (lib, flg) | |
231 | return { | |
232 | Name = "Function wgsub, set3", | |
233 | Func = get_wgsub (lib), | |
234 | --{ s, p, f, n, res1,res2,res3 }, | |
235 | { {L"abc", L"a", L"%0" }, {L"abc", 1, 1} }, -- test (in)valid capture index | |
236 | { {L"abc", L"a", L"%1" }, {L"abc", 1, 1} }, | |
237 | { {L"abc", L"[ac]", L"%1" }, {L"abc", 2, 2} }, | |
238 | { {L"abc", L"(a)", L"%1" }, {L"abc", 1, 1} }, | |
239 | { {L"abc", L"(a)", L"%2" }, "invalid capture index" }, | |
240 | } | |
241 | end | |
242 | ||
243 | local function set_f_wgsub4 (lib, flg) | |
244 | return { | |
245 | Name = "Function wgsub, set4", | |
246 | Func = get_wgsub (lib), | |
247 | --{ s, p, f, n, res1, res2, res3 }, | |
248 | { {L"a2c3", L".", L"#" }, {L"####", 4, 4} }, -- test . | |
249 | { {L"a2c3", L".+", L"#" }, {L"#", 1, 1} }, -- test .+ | |
250 | { {L"a2c3", L".*", L"#" }, {L"##", 2, 2} }, -- test .* | |
251 | { {L"/* */ */", L"\\/\\*(.*)\\*\\/", L"#" }, {L"#", 1, 1} }, | |
252 | { {L"a2c3", L"[0-9]", L"#" }, {L"a#c#", 2, 2} }, -- test %d | |
253 | { {L"a2c3", L"[^0-9]", L"#" }, {L"#2#3", 2, 2} }, -- test %D | |
254 | { {L"a \t\nb", L"[ \t\n]", L"#" }, {L"a###b", 3, 3} }, -- test %s | |
255 | { {L"a \t\nb", L"[^ \t\n]", L"#" }, {L"# \t\n#", 2, 2} }, -- test %S | |
256 | } | |
257 | end | |
258 | ||
259 | local function set_f_wgsub5 (lib, flg) | |
260 | local function frep1 () end -- returns nothing | |
261 | local function frep2 () return L"#" end -- ignores arguments | |
262 | local function frep3 (...) return table.concat({...}, L",") end -- "normal" | |
263 | local function frep4 () return {} end -- invalid return type | |
264 | local function frep5 () return L"7", L"a" end -- 2-nd return is "a" | |
265 | local function frep6 () return L"7", "break" end -- 2-nd return is "break" | |
266 | local subj = L"a2c3" | |
267 | return { | |
268 | Name = "Function wgsub, set5", | |
269 | Func = get_wgsub (lib), | |
270 | --{ s, p, f, n, res1, res2, res3 }, | |
271 | { {subj, L"a(.)c(.)", frep1 }, {subj, 1, 0} }, | |
272 | { {subj, L"a(.)c(.)", frep2 }, {L"#", 1, 1} }, | |
273 | { {subj, L"a(.)c(.)", frep3 }, {L"2,3", 1, 1} }, | |
274 | { {subj, L"a.c.", frep3 }, {subj, 1, 1} }, | |
275 | { {subj, L"z*", frep1 }, {subj, 5, 0} }, | |
276 | { {subj, L"z*", frep2 }, {L"#a#2#c#3#", 5, 5} }, | |
277 | { {subj, L"z*", frep3 }, {subj, 5, 5} }, | |
278 | { {subj, subj, frep4 }, "invalid return type" }, | |
279 | { {L"abc",L".", frep5 }, {L"777", 3, 3} }, | |
280 | { {L"abc",L".", frep6 }, {L"777", 3, 3} }, | |
281 | } | |
282 | end | |
283 | ||
284 | local function set_f_wgsub6 (lib, flg) | |
285 | local tab1, tab2, tab3 = {}, { [L"2"] = 56 }, { [L"2"] = {} } | |
286 | local subj = L"a2c3" | |
287 | return { | |
288 | Name = "Function wgsub, set6", | |
289 | Func = get_wgsub (lib), | |
290 | --{ s, p, f, n, res1,res2,res3 }, | |
291 | { {subj, L"a(.)c(.)", tab1 }, {subj, 1, 0} }, | |
292 | { {subj, L"a(.)c(.)", tab2 }, {"56", 1, 1} }, | |
293 | { {subj, L"a(.)c(.)", tab3 }, "invalid replacement type" }, | |
294 | { {subj, L"a.c.", tab1 }, {subj, 1, 0} }, | |
295 | { {subj, L"a.c.", tab2 }, {subj, 1, 0} }, | |
296 | { {subj, L"a.c.", tab3 }, {subj, 1, 0} }, | |
297 | } | |
298 | end | |
299 | ||
300 | local function set_f_wgsub8 (lib, flg) | |
301 | local subj, patt, repl = L"abcdef", L"..", L"*" | |
302 | return { | |
303 | Name = "Function wgsub, set8", | |
304 | Func = get_wgsub (lib), | |
305 | --{ s, p, f, n, res1, res2, res3 }, | |
306 | { {subj, patt, repl, function() end }, {L"abcdef", 3, 0} }, | |
307 | { {subj, patt, repl, function() return nil end }, {L"abcdef", 3, 0} }, | |
308 | { {subj, patt, repl, function() return false end }, {L"abcdef", 3, 0} }, | |
309 | { {subj, patt, repl, function() return true end }, {L"***", 3, 3} }, | |
310 | { {subj, patt, repl, function() return {} end }, {L"***", 3, 3} }, | |
311 | { {subj, patt, repl, function() return L"#" end }, {L"###", 3, 3} }, | |
312 | { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} }, | |
313 | { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} }, | |
314 | { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} }, | |
315 | { {subj, patt, repl, function (from,to,rep) return rep end }, | |
316 | {L"***", 3, 3} }, | |
317 | { {subj, patt, repl, function (from, to, rep) return rep..to..from end }, | |
318 | {L"*".."21"..L"*".."43"..L"*".."65", 3, 3} }, | |
319 | { {subj, patt, repl, function() return nil end }, {L"abcdef", 3, 0} }, | |
320 | { {subj, patt, repl, function() return nil, nil end }, {L"abcdef", 3, 0} }, | |
321 | { {subj, patt, repl, function() return nil, false end }, {L"abcdef", 3, 0} }, | |
322 | { {subj, patt, repl, function() return nil, true end }, {L"ab**", 3, 2} }, | |
323 | { {subj, patt, repl, function() return true, true end }, {L"***", 3, 3} }, | |
324 | { {subj, patt, repl, function() return nil, 0 end }, {L"abcdef", 1, 0} }, | |
325 | { {subj, patt, repl, function() return true, 0 end }, {L"*cdef", 1, 1} }, | |
326 | { {subj, patt, repl, function() return nil, 1 end }, {L"ab*ef", 2, 1} }, | |
327 | { {subj, patt, repl, function() return true, 1 end }, {L"**ef", 2, 2} }, | |
328 | } | |
329 | end | |
330 | ||
331 | return function (libname) | |
332 | local lib = require (libname) | |
333 | lib.new = lib.wnew | |
334 | return { | |
335 | set_f_wgmatch (lib), | |
336 | set_f_wsplit (lib), | |
337 | set_f_wfind (lib), | |
338 | set_f_wmatch (lib), | |
339 | set_m_wexec (lib), | |
340 | set_m_waexec (lib), | |
341 | set_m_wtfind (lib), | |
342 | set_m_watfind (lib), | |
343 | set_m_wfind (lib), | |
344 | set_m_wmatch (lib), | |
345 | set_f_wgsub1 (lib), | |
346 | set_f_wgsub2 (lib), | |
347 | set_f_wgsub3 (lib), | |
348 | set_f_wgsub4 (lib), | |
349 | set_f_wgsub5 (lib), | |
350 | set_f_wgsub6 (lib), | |
351 | set_f_wgsub8 (lib), | |
352 | } | |
353 | end |
0 | # Makefile for lrexlib | |
1 | ||
2 | ifeq ($(DIRBIT),64) | |
3 | MKFILES = \ | |
4 | rex_onig.mak \ | |
5 | rex_pcre.mak \ | |
6 | rex_pcre2.mak \ | |
7 | rex_tre.mak | |
8 | else | |
9 | MKFILES = \ | |
10 | rex_gnu.mak \ | |
11 | rex_onig.mak \ | |
12 | rex_pcre.mak \ | |
13 | rex_pcre2.mak \ | |
14 | rex_spencer.mak \ | |
15 | rex_tre.mak | |
16 | endif | |
17 | ||
18 | LOOP = @for %%d in ($(MKFILES)) do $(MAKE) -f %%d | |
19 | ||
20 | all: build test | |
21 | ||
22 | build: | |
23 | $(LOOP) | |
24 | ||
25 | test: | |
26 | $(LOOP) test | |
27 | ||
28 | install: | |
29 | $(LOOP) install | |
30 | ||
31 | clean: | |
32 | del *.o *.def *.dll | |
33 | ||
34 | .PHONY: all build test install clean |
0 | # Use with GNU Make. | |
1 | ||
2 | # Lrexlib version | |
3 | VERSION = 2.9.1 | |
4 | ||
5 | # User Settings ------------------------------------------------------------ | |
6 | ||
7 | # Target Lua version (51 for Lua 5.1, etc.) | |
8 | LUAVERSION = 51 | |
9 | LUADOTVERSION = $(subst 5,5.,$(LUAVERSION)) | |
10 | ||
11 | # Target bitness: 32 or 64 | |
12 | DIRBIT = 32 | |
13 | # GCC location (GCC32 and GCC64 are defined environment variables) | |
14 | PATH = $(GCC$(DIRBIT)) | |
15 | ||
16 | # INSTALLPATH : Path to install the built DLL. | |
17 | # LUADLL : Lua DLL to link to (.dll should be omitted). | |
18 | # LUAEXE : Lua interpreter. | |
19 | # LUAINC : Path of Lua include files. | |
20 | # LIBPATH : Path of lua51.dll, lua52.dll, pcre.dll, etc. | |
21 | ||
22 | INSTALLPATH = S:\Progr\Exe\lib$(DIRBIT)\lua\$(LUADOTVERSION) | |
23 | LUADLL = lua$(LUAVERSION) | |
24 | LUAINC = $(PATH_SYSTEM)\include\lua\$(LUADOTVERSION) | |
25 | LIBPATH = $(CROOT)\Programs\EXE$(DIRBIT) | |
26 | ||
27 | ifeq ($(LUAVERSION),51) | |
28 | LUAEXE = $(LIBPATH)\lua.exe | |
29 | CREATEGLOBAL = -DREX_CREATEGLOBALVAR | |
30 | else | |
31 | LUAEXE = $(LIBPATH)\lua$(LUAVERSION).exe | |
32 | endif | |
33 | # -------------------------------------------------------------------------- | |
34 | ||
35 | BIN = $(PROJECT).dll | |
36 | BININSTALL = $(INSTALLPATH)\$(BIN) | |
37 | CC = gcc | |
38 | AR = ar rcu | |
39 | RANLIB = ranlib | |
40 | CFLAGS = -W -Wall -O2 $(INCS) -DREX_OPENLIB=luaopen_$(PROJECT) \ | |
41 | -DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" \ | |
42 | -m$(DIRBIT) $(CREATEGLOBAL) $(MYCFLAGS) | |
43 | DEFFILE = $(PROJECT).def | |
44 | EXPORTED = luaopen_$(PROJECT) | |
45 | INCS = -I$(LUAINC) $(MYINCS) | |
46 | LIBS = -l$(LUADLL) -m$(DIRBIT) -s $(MYLIBS) | |
47 | SRCPATH = ..\..\src | |
48 | TESTPATH = ..\..\test | |
49 | ||
50 | .PHONY: all install test vtest clean | |
51 | ||
52 | vpath %.c $(SRCPATH);$(SRCPATH)\$(PROJDIR) | |
53 | vpath %.h $(SRCPATH);$(SRCPATH)\$(PROJDIR) | |
54 | ||
55 | all: $(BIN) | |
56 | ||
57 | clean: | |
58 | del $(OBJ) $(BIN) $(DEFFILE) | |
59 | ||
60 | install: $(BININSTALL) | |
61 | ||
62 | test: | |
63 | cd $(TESTPATH) && $(LUAEXE) runtest.lua $(TESTNAME) -d$(CURDIR) | |
64 | ||
65 | vtest: | |
66 | cd $(TESTPATH) && $(LUAEXE) runtest.lua -v $(TESTNAME) -d$(CURDIR) | |
67 | ||
68 | $(BIN): $(OBJ) $(DEFFILE) | |
69 | $(CC) $(DEFFILE) $(OBJ) -L$(LIBPATH) $(LIBS) -o $@ -shared | |
70 | ||
71 | lib$(PROJECT)$(LUAVERSION).a: $(OBJ) | |
72 | $(AR) $@ $? | |
73 | $(RANLIB) $@ | |
74 | ||
75 | $(DEFFILE): | |
76 | echo EXPORTS > $@ | |
77 | for %%d in ($(EXPORTED)) do echo %%d>> $@ | |
78 | ||
79 | $(BININSTALL): $(BIN) | |
80 | copy /Y $< $@ |
0 | # Documentation Makefile | |
1 | ||
2 | APP = rst2html.py | |
3 | CP = "copy /y" | |
4 | RM = del | |
5 | IDX = ..\README.rst | |
6 | ||
7 | ALLVAR = APP=$(APP) CP=$(CP) RM=$(RM) IDX=$(IDX) | |
8 | ||
9 | .PHONY: all clean | |
10 | ||
11 | all clean: | |
12 | cd ..\..\doc && $(MAKE) $(ALLVAR) $@ |
0 | # Project: rex_gnu | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of GNU include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\gnuregex | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_gnu | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lregex2 | |
10 | OBJ = lgnu.o common.o | |
11 | PROJDIR = gnu | |
12 | TESTNAME = gnu | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lgnu.o : common.h algo.h | |
17 | common.o : common.h |
0 | # Project: rex_onig | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of Oniguruma include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\oniguruma | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_onig | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lonig -Wl,--enable-auto-import | |
10 | OBJ = lonig.o lonig_f.o common.o | |
11 | PROJDIR = oniguruma | |
12 | TESTNAME = oniguruma | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lonig.o : common.h algo.h | |
17 | lonig_f.o : common.h | |
18 | common.o : common.h |
0 | # Project: rex_pcre | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of PCRE include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\pcre | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_pcre | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lpcre | |
10 | OBJ = lpcre.o lpcre_f.o common.o | |
11 | PROJDIR = pcre | |
12 | TESTNAME = pcre | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lpcre.o : common.h algo.h | |
17 | lpcre_f.o : common.h | |
18 | common.o : common.h |
0 | # Project: rex_pcre2 | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of PCRE2 include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\pcre2 | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_pcre2 | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYCFLAGS = -DPCRE2_CODE_UNIT_WIDTH=8 | |
10 | MYLIBS = -lpcre2 | |
11 | OBJ = lpcre2.o lpcre2_f.o common.o | |
12 | PROJDIR = pcre2 | |
13 | TESTNAME = pcre2 | |
14 | ||
15 | include _mingw.mak | |
16 | ||
17 | lpcre2.o : common.h algo.h | |
18 | lpcre2_f.o : common.h | |
19 | common.o : common.h |
0 | # Project: rex_spencer | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of Spencer's include files | |
4 | REGEXINC = $(PATH_WORK)\system\include\rxspencer | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_spencer | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -lrxspencer | |
10 | OBJ = lposix.o common.o | |
11 | PROJDIR = posix | |
12 | TESTNAME = spencer | |
13 | ||
14 | include _mingw.mak | |
15 | ||
16 | lposix.o : common.h algo.h | |
17 | common.o : common.h |
0 | # Project: rex_tre | |
1 | ||
2 | # User Settings ------------------------------------------------------------ | |
3 | # path of TRE include files | |
4 | REGEXINC = $(PATH_WORK)\system\include | |
5 | # -------------------------------------------------------------------------- | |
6 | ||
7 | PROJECT = rex_tre | |
8 | MYINCS = -I$(REGEXINC) | |
9 | MYLIBS = -ltre | |
10 | OBJ = ltre.o common.o | |
11 | PROJDIR = tre | |
12 | TESTNAME = tre | |
13 | ||
14 | # Uncomment the following line to add wide-character functions (in alpha state). | |
15 | # ADDWIDECHARFUNCS = 1 | |
16 | ifdef ADDWIDECHARFUNCS | |
17 | OBJ += ltre_w.o | |
18 | MYCFLAGS += -DREX_ADDWIDECHARFUNCS | |
19 | endif | |
20 | ||
21 | include _mingw.mak | |
22 | ||
23 | ltre.o : common.h algo.h | |
24 | ltre_w.o : common.h algo.h | |
25 | common.o : common.h |