[svn-upgrade] Integrating new upstream version, libjson-xs-perl (2.23)
Angel Abad Contreras
15 years ago
0 | 0 | Revision history for Perl extension JSON::XS |
1 | ||
2 | 2.23 Mon Sep 29 05:08:29 CEST 2008 | |
3 | - fix a compilation problem when perl is not using char * as, well, | |
4 | char *. | |
5 | - use PL_hexdigit in favour of rolling our own. | |
6 | ||
7 | 2.2222 Sun Jul 20 18:49:00 CEST 2008 | |
8 | - same game again, broken 5.10 finds yet another assertion | |
9 | failure, and the workaround causes additional runtime warnings. | |
10 | Work around the next assertion AND the warning. 5.10 seriously | |
11 | needs to adjust it's attitude against working code. | |
12 | ||
13 | 2.222 Sat Jul 19 06:15:34 CEST 2008 | |
14 | - you work around one -DDEBUGGING assertion bug in perl 5.10 | |
15 | just to hit the next one. work around this one, too. | |
16 | ||
17 | 2.22 Tue Jul 15 13:26:51 CEST 2008 | |
18 | - allow higher nesting levels in incremental parser. | |
19 | - error out earlier in some cases in the incremental parser | |
20 | (as suggested by Yuval Kogman). | |
21 | - improve incr-parser test (Yuval Kogman). | |
22 | ||
23 | 2.21 Tue Jun 3 08:43:23 CEST 2008 | |
24 | - (hopefully) work around a perl 5.10 bug with -DDEBUGGING. | |
25 | - remove the experimental status of the incremental parser interface. | |
26 | - move =encoding around again, to avoid bugs with search.cpan.org. | |
27 | when can we finally have utf-8 in pod??? | |
28 | - add ->incr_reset method. | |
29 | ||
30 | 2.2 Wed Apr 16 20:37:25 CEST 2008 | |
31 | - lifted the log2 rounding restriction of max_depth and max_size. | |
32 | - make booleans mutable by creating a copy instead of handing out | |
33 | the same scalar (reported by pasha sadri). | |
34 | - added support for incremental json parsing (still EXPERIMENTAL). | |
35 | - implemented and added a json_xs command line utility that can convert | |
36 | from/to a number of serialisation formats - tell me if you need more. | |
37 | - implement allow_unknown/get_allow_unknown methods. | |
38 | - fixed documentation of max_depth w.r.t. higher and equal. | |
39 | - moved down =encoding directive a bit, too much breaks if it's the first | |
40 | pod directive :/. | |
41 | - removed documentation section on other modules, it became somewhat | |
42 | outdated and is nowadays mostly of historical interest. | |
43 | ||
44 | 2.1 Wed Mar 19 23:23:18 CET 2008 | |
45 | - update documentation here and there: add a large section | |
46 | about utf8/latin1/ascii flags, add a security consideration | |
47 | and extend and clarify the JSON and YAML section. | |
48 | - medium speed enhancements when encoding/decoding non-ascii chars. | |
49 | - minor speedup in number encoding case. | |
50 | - extend and clarify the section on incompatibilities | |
51 | between YAML and JSON. | |
52 | - switch to static inline from just inline when using gcc. | |
53 | - add =encoding utf-8 to the manpage, now that perl 5.10 supports it. | |
54 | - fix some issues with UV to JSON conversion of unknown impact. | |
55 | - published the yahoo locals search result used in benchmarks as the | |
56 | original url changes so comparison is impossible. | |
1 | 57 | |
2 | 58 | 2.01 Wed Dec 5 11:40:28 CET 2007 |
3 | 59 | - INCOMPATIBLE API CHANGE: to_json and from_json have been |
215 | 271 | (non-unicode) codepoint is encountered. |
216 | 272 | |
217 | 273 | 0.2 Fri Mar 23 00:23:34 CET 2007 |
218 | - the "could not sleep without debuggign release". | |
274 | - the "could not sleep without debugging release". | |
219 | 275 | it should basically work now, with many bugs as |
220 | 276 | no production tests have been run yet. |
221 | 277 | - added more testcases. |
5 | 5 | XS.pm |
6 | 6 | XS.xs |
7 | 7 | XS/Boolean.pm |
8 | bin/json_xs | |
8 | 9 | eg/bench |
9 | 10 | t/00_load.t |
10 | 11 | t/01_utf8.t |
25 | 26 | t/16_tied.t |
26 | 27 | t/17_relaxed.t |
27 | 28 | t/18_json_checker.t |
29 | t/19_incr.t | |
28 | 30 | t/99_binary.t |
29 | 31 | typemap |
30 | 32 | META.yml Module meta-data (added by MakeMaker) |
0 | 0 | --- #YAML:1.0 |
1 | 1 | name: JSON-XS |
2 | version: 2.01 | |
2 | version: 2.23 | |
3 | 3 | abstract: ~ |
4 | 4 | license: ~ |
5 | generated_by: ExtUtils::MakeMaker version 6.32 | |
5 | author: ~ | |
6 | generated_by: ExtUtils::MakeMaker version 6.44 | |
6 | 7 | distribution_type: module |
7 | 8 | requires: |
8 | 9 | meta-spec: |
9 | url: http://module-build.sourceforge.net/META-spec-v1.2.html | |
10 | version: 1.2 | |
10 | url: http://module-build.sourceforge.net/META-spec-v1.3.html | |
11 | version: 1.3 |
1 | 1 | use ExtUtils::MakeMaker; |
2 | 2 | |
3 | 3 | WriteMakefile( |
4 | dist => { | |
5 | PREOP => 'pod2text XS.pm | tee README >$(DISTVNAME)/README; chmod -R u=rwX,go=rX . ;', | |
6 | COMPRESS => 'gzip -9v', | |
7 | SUFFIX => '.gz', | |
8 | }, | |
9 | NAME => "JSON::XS", | |
4 | dist => { | |
5 | PREOP => 'pod2text XS.pm | tee README >$(DISTVNAME)/README; chmod -R u=rwX,go=rX . ;', | |
6 | COMPRESS => 'gzip -9v', | |
7 | SUFFIX => '.gz', | |
8 | }, | |
9 | EXE_FILES => [ "bin/json_xs" ], | |
10 | 10 | VERSION_FROM => "XS.pm", |
11 | NAME => "JSON::XS", | |
11 | 12 | ); |
12 | 13 |
0 | 0 | NAME |
1 | 1 | JSON::XS - JSON serialising/deserialising, done correctly and fast |
2 | 2 | |
3 | JSON::XS - 正しくて高速な JSON | |
4 | シリアライザ/デシリアライザ | |
3 | JSON::XS - 正しくて高速な JSON シリアライザ/デシリアライザ | |
5 | 4 | (http://fleur.hio.jp/perldoc/mix/lib/JSON/XS.html) |
6 | 5 | |
7 | 6 | SYNOPSIS |
22 | 21 | # Note that JSON version 2.0 and above will automatically use JSON::XS |
23 | 22 | # if available, at virtually no speed overhead either, so you should |
24 | 23 | # be able to just: |
25 | ||
26 | use JSON; | |
24 | ||
25 | use JSON; | |
27 | 26 | |
28 | 27 | # and do the same things, except that you have a pure-perl fallback now. |
29 | 28 | |
34 | 33 | |
35 | 34 | Beginning with version 2.0 of the JSON module, when both JSON and |
36 | 35 | JSON::XS are installed, then JSON will fall back on JSON::XS (this can |
37 | be overriden) with no overhead due to emulation (by inheritign | |
36 | be overridden) with no overhead due to emulation (by inheriting | |
38 | 37 | constructor and methods). If JSON::XS is not available, it will fall |
39 | 38 | back to the compatible JSON::PP module as backend, so using JSON instead |
40 | 39 | of JSON::XS gives you a portable JSON API that can be fast when you need |
46 | 45 | cases their maintainers are unresponsive, gone missing, or not listening |
47 | 46 | to bug reports for other reasons. |
48 | 47 | |
49 | See COMPARISON, below, for a comparison to some other JSON modules. | |
50 | ||
51 | 48 | See MAPPING, below, on how JSON::XS maps perl values to JSON values and |
52 | 49 | vice versa. |
53 | 50 | |
54 | 51 | FEATURES |
55 | * correct Unicode handling | |
56 | This module knows how to handle Unicode, and even documents how and | |
57 | when it does so. | |
58 | ||
59 | * round-trip integrity | |
60 | When you serialise a perl data structure using only datatypes | |
52 | * correct Unicode handling | |
53 | ||
54 | This module knows how to handle Unicode, documents how and when it | |
55 | does so, and even documents what "correct" means. | |
56 | ||
57 | * round-trip integrity | |
58 | ||
59 | When you serialise a perl data structure using only data types | |
61 | 60 | supported by JSON, the deserialised data structure is identical on |
62 | 61 | the Perl level. (e.g. the string "2.0" doesn't suddenly become "2" |
63 | just because it looks like a number). | |
64 | ||
65 | * strict checking of JSON correctness | |
62 | just because it looks like a number). There minor *are* exceptions | |
63 | to this, read the MAPPING section below to learn about those. | |
64 | ||
65 | * strict checking of JSON correctness | |
66 | ||
66 | 67 | There is no guessing, no generating of illegal JSON texts by |
67 | 68 | default, and only JSON is accepted as input by default (the latter |
68 | 69 | is a security feature). |
69 | 70 | |
70 | * fast | |
71 | Compared to other JSON modules, this module compares favourably in | |
72 | terms of speed, too. | |
73 | ||
74 | * simple to use | |
75 | This module has both a simple functional interface as well as an OO | |
76 | interface. | |
77 | ||
78 | * reasonably versatile output formats | |
79 | You can choose between the most compact guaranteed single-line | |
80 | format possible (nice for simple line-based protocols), a pure-ascii | |
71 | * fast | |
72 | ||
73 | Compared to other JSON modules and other serialisers such as | |
74 | Storable, this module usually compares favourably in terms of speed, | |
75 | too. | |
76 | ||
77 | * simple to use | |
78 | ||
79 | This module has both a simple functional interface as well as an | |
80 | object oriented interface interface. | |
81 | ||
82 | * reasonably versatile output formats | |
83 | ||
84 | You can choose between the most compact guaranteed-single-line | |
85 | format possible (nice for simple line-based protocols), a pure-ASCII | |
81 | 86 | format (for when your transport is not 8-bit clean, still supports |
82 | 87 | the whole Unicode range), or a pretty-printed format (for when you |
83 | 88 | want to read that stuff). Or you can combine those features in |
95 | 100 | |
96 | 101 | $json_text = JSON::XS->new->utf8->encode ($perl_scalar) |
97 | 102 | |
98 | except being faster. | |
103 | Except being faster. | |
99 | 104 | |
100 | 105 | $perl_scalar = decode_json $json_text |
101 | 106 | The opposite of "encode_json": expects an UTF-8 (binary) string and |
106 | 111 | |
107 | 112 | $perl_scalar = JSON::XS->new->utf8->decode ($json_text) |
108 | 113 | |
109 | except being faster. | |
114 | Except being faster. | |
110 | 115 | |
111 | 116 | $is_boolean = JSON::XS::is_bool $scalar |
112 | 117 | Returns true if the passed scalar represents either JSON::XS::true |
126 | 131 | a Perl string - very natural. |
127 | 132 | |
128 | 133 | 2. Perl does *not* associate an encoding with your strings. |
129 | Unless you force it to, e.g. when matching it against a regex, or | |
134 | ... until you force it to, e.g. when matching it against a regex, or | |
130 | 135 | printing the scalar to a file, in which case Perl either interprets |
131 | 136 | your string as locale-encoded text, octets/binary, or as Unicode, |
132 | 137 | depending on various settings. In no case is an encoding stored |
133 | 138 | together with your data, it is *use* that decides encoding, not any |
134 | magical metadata. | |
139 | magical meta data. | |
135 | 140 | |
136 | 141 | 3. The internal utf-8 flag has no meaning with regards to the encoding |
137 | 142 | of your string. |
146 | 151 | doesn't exist. |
147 | 152 | |
148 | 153 | 4. A "Unicode String" is simply a string where each character can be |
149 | validly interpreted as a Unicode codepoint. | |
154 | validly interpreted as a Unicode code point. | |
150 | 155 | If you have UTF-8 encoded data, it is no longer a Unicode string, |
151 | 156 | but a Unicode string encoded in UTF-8, giving you a binary string. |
152 | 157 | |
186 | 191 | Unicode characters unless required by the JSON syntax or other |
187 | 192 | flags. This results in a faster and more compact format. |
188 | 193 | |
194 | See also the section *ENCODING/CODESET FLAG NOTES* later in this | |
195 | document. | |
196 | ||
189 | 197 | The main use for this flag is to produce JSON texts that can be |
190 | 198 | transmitted over a 7-bit channel, as the encoded JSON texts will not |
191 | 199 | contain any 8 bit characters. |
206 | 214 | If $enable is false, then the "encode" method will not escape |
207 | 215 | Unicode characters unless required by the JSON syntax or other |
208 | 216 | flags. |
217 | ||
218 | See also the section *ENCODING/CODESET FLAG NOTES* later in this | |
219 | document. | |
209 | 220 | |
210 | 221 | The main use for this flag is efficiently encoding binary data as |
211 | 222 | JSON text, as most octets will not be escaped, resulting in a |
235 | 246 | thus a Unicode string. Any decoding or encoding (e.g. to UTF-8 or |
236 | 247 | UTF-16) needs to be done yourself, e.g. using the Encode module. |
237 | 248 | |
249 | See also the section *ENCODING/CODESET FLAG NOTES* later in this | |
250 | document. | |
251 | ||
238 | 252 | Example, output UTF-16BE-encoded JSON: |
239 | 253 | |
240 | 254 | use Encode; |
319 | 333 | |
320 | 334 | Currently accepted extensions are: |
321 | 335 | |
322 | * list items can have an end-comma | |
336 | * list items can have an end-comma | |
337 | ||
323 | 338 | JSON *separates* array elements and key-value pairs with commas. |
324 | 339 | This can be annoying if you write JSON texts manually and want |
325 | 340 | to be able to quickly append elements, so this extension accepts |
334 | 349 | "k2": "v2", <- this comma not normally allowed |
335 | 350 | } |
336 | 351 | |
337 | * shell-style '#'-comments | |
352 | * shell-style '#'-comments | |
353 | ||
338 | 354 | Whenever JSON allows whitespace, shell-style comments are |
339 | 355 | additionally allowed. They are terminated by the first |
340 | 356 | carriage-return or line-feed character, after which more |
380 | 396 | |
381 | 397 | JSON::XS->new->allow_nonref->encode ("Hello, World!") |
382 | 398 | => "Hello, World!" |
399 | ||
400 | $json = $json->allow_unknown ([$enable]) | |
401 | $enabled = $json->get_allow_unknown | |
402 | If $enable is true (or missing), then "encode" will *not* throw an | |
403 | exception when it encounters values it cannot represent in JSON (for | |
404 | example, filehandles) but instead will encode a JSON "null" value. | |
405 | Note that blessed objects are not included here and are handled | |
406 | separately by c<allow_nonref>. | |
407 | ||
408 | If $enable is false (the default), then "encode" will throw an | |
409 | exception when it encounters anything it cannot encode as JSON. | |
410 | ||
411 | This option does not affect "decode" in any way, and it is | |
412 | recommended to leave it off unless you know your communications | |
413 | partner. | |
383 | 414 | |
384 | 415 | $json = $json->allow_blessed ([$enable]) |
385 | 416 | $enabled = $json->get_allow_blessed |
524 | 555 | $json = $json->max_depth ([$maximum_nesting_depth]) |
525 | 556 | $max_depth = $json->get_max_depth |
526 | 557 | Sets the maximum nesting level (default 512) accepted while encoding |
527 | or decoding. If the JSON text or Perl data structure has an equal or | |
528 | higher nesting level then this limit, then the encoder and decoder | |
529 | will stop and croak at that point. | |
558 | or decoding. If a higher nesting level is detected in JSON text or a | |
559 | Perl data structure, then the encoder and decoder will stop and | |
560 | croak at that point. | |
530 | 561 | |
531 | 562 | Nesting level is defined by number of hash- or arrayrefs that the |
532 | 563 | encoder needs to traverse to reach a given point or the number of |
536 | 567 | Setting the maximum depth to one disallows any nesting, so that |
537 | 568 | ensures that the object is only a single hash/object or array. |
538 | 569 | |
539 | The argument to "max_depth" will be rounded up to the next highest | |
540 | power of two. If no argument is given, the highest possible setting | |
541 | will be used, which is rarely useful. | |
570 | If no argument is given, the highest possible setting will be used, | |
571 | which is rarely useful. | |
572 | ||
573 | Note that nesting is implemented by recursion in C. The default | |
574 | value has been chosen to be as large as typical operating systems | |
575 | allow without crashing. | |
542 | 576 | |
543 | 577 | See SECURITY CONSIDERATIONS, below, for more info on why this is |
544 | 578 | useful. |
547 | 581 | $max_size = $json->get_max_size |
548 | 582 | Set the maximum length a JSON text may have (in bytes) where |
549 | 583 | decoding is being attempted. The default is 0, meaning no limit. |
550 | When "decode" is called on a string longer then this number of | |
551 | characters it will not attempt to decode the string but throw an | |
584 | When "decode" is called on a string that is longer then this many | |
585 | bytes, it will not attempt to decode the string but throw an | |
552 | 586 | exception. This setting has no effect on "encode" (yet). |
553 | 587 | |
554 | The argument to "max_size" will be rounded up to the next highest | |
555 | power of two (so may be more than requested). If no argument is | |
556 | given, the limit check will be deactivated (same as when 0 is | |
557 | specified). | |
588 | If no argument is given, the limit check will be deactivated (same | |
589 | as when 0 is specified). | |
558 | 590 | |
559 | 591 | See SECURITY CONSIDERATIONS, below, for more info on why this is |
560 | 592 | useful. |
589 | 621 | JSON::XS->new->decode_prefix ("[1] the tail") |
590 | 622 | => ([], 3) |
591 | 623 | |
624 | INCREMENTAL PARSING | |
625 | In some cases, there is the need for incremental parsing of JSON texts. | |
626 | While this module always has to keep both JSON text and resulting Perl | |
627 | data structure in memory at one time, it does allow you to parse a JSON | |
628 | stream incrementally. It does so by accumulating text until it has a | |
629 | full JSON object, which it then can decode. This process is similar to | |
630 | using "decode_prefix" to see if a full JSON object is available, but is | |
631 | much more efficient (and can be implemented with a minimum of method | |
632 | calls). | |
633 | ||
634 | JSON::XS will only attempt to parse the JSON text once it is sure it has | |
635 | enough text to get a decisive result, using a very simple but truly | |
636 | incremental parser. This means that it sometimes won't stop as early as | |
637 | the full parser, for example, it doesn't detect parenthese mismatches. | |
638 | The only thing it guarantees is that it starts decoding as soon as a | |
639 | syntactically valid JSON text has been seen. This means you need to set | |
640 | resource limits (e.g. "max_size") to ensure the parser will stop parsing | |
641 | in the presence if syntax errors. | |
642 | ||
643 | The following methods implement this incremental parser. | |
644 | ||
645 | [void, scalar or list context] = $json->incr_parse ([$string]) | |
646 | This is the central parsing function. It can both append new text | |
647 | and extract objects from the stream accumulated so far (both of | |
648 | these functions are optional). | |
649 | ||
650 | If $string is given, then this string is appended to the already | |
651 | existing JSON fragment stored in the $json object. | |
652 | ||
653 | After that, if the function is called in void context, it will | |
654 | simply return without doing anything further. This can be used to | |
655 | add more text in as many chunks as you want. | |
656 | ||
657 | If the method is called in scalar context, then it will try to | |
658 | extract exactly *one* JSON object. If that is successful, it will | |
659 | return this object, otherwise it will return "undef". If there is a | |
660 | parse error, this method will croak just as "decode" would do (one | |
661 | can then use "incr_skip" to skip the errornous part). This is the | |
662 | most common way of using the method. | |
663 | ||
664 | And finally, in list context, it will try to extract as many objects | |
665 | from the stream as it can find and return them, or the empty list | |
666 | otherwise. For this to work, there must be no separators between the | |
667 | JSON objects or arrays, instead they must be concatenated | |
668 | back-to-back. If an error occurs, an exception will be raised as in | |
669 | the scalar context case. Note that in this case, any | |
670 | previously-parsed JSON texts will be lost. | |
671 | ||
672 | $lvalue_string = $json->incr_text | |
673 | This method returns the currently stored JSON fragment as an lvalue, | |
674 | that is, you can manipulate it. This *only* works when a preceding | |
675 | call to "incr_parse" in *scalar context* successfully returned an | |
676 | object. Under all other circumstances you must not call this | |
677 | function (I mean it. although in simple tests it might actually | |
678 | work, it *will* fail under real world conditions). As a special | |
679 | exception, you can also call this method before having parsed | |
680 | anything. | |
681 | ||
682 | This function is useful in two cases: a) finding the trailing text | |
683 | after a JSON object or b) parsing multiple JSON objects separated by | |
684 | non-JSON text (such as commas). | |
685 | ||
686 | $json->incr_skip | |
687 | This will reset the state of the incremental parser and will remove | |
688 | the parsed text from the input buffer. This is useful after | |
689 | "incr_parse" died, in which case the input buffer and incremental | |
690 | parser state is left unchanged, to skip the text parsed so far and | |
691 | to reset the parse state. | |
692 | ||
693 | $json->incr_reset | |
694 | This completely resets the incremental parser, that is, after this | |
695 | call, it will be as if the parser had never parsed anything. | |
696 | ||
697 | This is useful if you want ot repeatedly parse JSON objects and want | |
698 | to ignore any trailing data, which means you have to reset the | |
699 | parser after each successful decode. | |
700 | ||
701 | LIMITATIONS | |
702 | All options that affect decoding are supported, except "allow_nonref". | |
703 | The reason for this is that it cannot be made to work sensibly: JSON | |
704 | objects and arrays are self-delimited, i.e. you can concatenate them | |
705 | back to back and still decode them perfectly. This does not hold true | |
706 | for JSON numbers, however. | |
707 | ||
708 | For example, is the string 1 a single JSON number, or is it simply the | |
709 | start of 12? Or is 12 a single JSON number, or the concatenation of 1 | |
710 | and 2? In neither case you can tell, and this is why JSON::XS takes the | |
711 | conservative route and disallows this case. | |
712 | ||
713 | EXAMPLES | |
714 | Some examples will make all this clearer. First, a simple example that | |
715 | works similarly to "decode_prefix": We want to decode the JSON object at | |
716 | the start of a string and identify the portion after the JSON object: | |
717 | ||
718 | my $text = "[1,2,3] hello"; | |
719 | ||
720 | my $json = new JSON::XS; | |
721 | ||
722 | my $obj = $json->incr_parse ($text) | |
723 | or die "expected JSON object or array at beginning of string"; | |
724 | ||
725 | my $tail = $json->incr_text; | |
726 | # $tail now contains " hello" | |
727 | ||
728 | Easy, isn't it? | |
729 | ||
730 | Now for a more complicated example: Imagine a hypothetical protocol | |
731 | where you read some requests from a TCP stream, and each request is a | |
732 | JSON array, without any separation between them (in fact, it is often | |
733 | useful to use newlines as "separators", as these get interpreted as | |
734 | whitespace at the start of the JSON text, which makes it possible to | |
735 | test said protocol with "telnet"...). | |
736 | ||
737 | Here is how you'd do it (it is trivial to write this in an event-based | |
738 | manner): | |
739 | ||
740 | my $json = new JSON::XS; | |
741 | ||
742 | # read some data from the socket | |
743 | while (sysread $socket, my $buf, 4096) { | |
744 | ||
745 | # split and decode as many requests as possible | |
746 | for my $request ($json->incr_parse ($buf)) { | |
747 | # act on the $request | |
748 | } | |
749 | } | |
750 | ||
751 | Another complicated example: Assume you have a string with JSON objects | |
752 | or arrays, all separated by (optional) comma characters (e.g. "[1],[2], | |
753 | [3]"). To parse them, we have to skip the commas between the JSON texts, | |
754 | and here is where the lvalue-ness of "incr_text" comes in useful: | |
755 | ||
756 | my $text = "[1],[2], [3]"; | |
757 | my $json = new JSON::XS; | |
758 | ||
759 | # void context, so no parsing done | |
760 | $json->incr_parse ($text); | |
761 | ||
762 | # now extract as many objects as possible. note the | |
763 | # use of scalar context so incr_text can be called. | |
764 | while (my $obj = $json->incr_parse) { | |
765 | # do something with $obj | |
766 | ||
767 | # now skip the optional comma | |
768 | $json->incr_text =~ s/^ \s* , //x; | |
769 | } | |
770 | ||
771 | Now lets go for a very complex example: Assume that you have a gigantic | |
772 | JSON array-of-objects, many gigabytes in size, and you want to parse it, | |
773 | but you cannot load it into memory fully (this has actually happened in | |
774 | the real world :). | |
775 | ||
776 | Well, you lost, you have to implement your own JSON parser. But JSON::XS | |
777 | can still help you: You implement a (very simple) array parser and let | |
778 | JSON decode the array elements, which are all full JSON objects on their | |
779 | own (this wouldn't work if the array elements could be JSON numbers, for | |
780 | example): | |
781 | ||
782 | my $json = new JSON::XS; | |
783 | ||
784 | # open the monster | |
785 | open my $fh, "<bigfile.json" | |
786 | or die "bigfile: $!"; | |
787 | ||
788 | # first parse the initial "[" | |
789 | for (;;) { | |
790 | sysread $fh, my $buf, 65536 | |
791 | or die "read error: $!"; | |
792 | $json->incr_parse ($buf); # void context, so no parsing | |
793 | ||
794 | # Exit the loop once we found and removed(!) the initial "[". | |
795 | # In essence, we are (ab-)using the $json object as a simple scalar | |
796 | # we append data to. | |
797 | last if $json->incr_text =~ s/^ \s* \[ //x; | |
798 | } | |
799 | ||
800 | # now we have the skipped the initial "[", so continue | |
801 | # parsing all the elements. | |
802 | for (;;) { | |
803 | # in this loop we read data until we got a single JSON object | |
804 | for (;;) { | |
805 | if (my $obj = $json->incr_parse) { | |
806 | # do something with $obj | |
807 | last; | |
808 | } | |
809 | ||
810 | # add more data | |
811 | sysread $fh, my $buf, 65536 | |
812 | or die "read error: $!"; | |
813 | $json->incr_parse ($buf); # void context, so no parsing | |
814 | } | |
815 | ||
816 | # in this loop we read data until we either found and parsed the | |
817 | # separating "," between elements, or the final "]" | |
818 | for (;;) { | |
819 | # first skip whitespace | |
820 | $json->incr_text =~ s/^\s*//; | |
821 | ||
822 | # if we find "]", we are done | |
823 | if ($json->incr_text =~ s/^\]//) { | |
824 | print "finished.\n"; | |
825 | exit; | |
826 | } | |
827 | ||
828 | # if we find ",", we can continue with the next element | |
829 | if ($json->incr_text =~ s/^,//) { | |
830 | last; | |
831 | } | |
832 | ||
833 | # if we find anything else, we have a parse error! | |
834 | if (length $json->incr_text) { | |
835 | die "parse error near ", $json->incr_text; | |
836 | } | |
837 | ||
838 | # else add more data | |
839 | sysread $fh, my $buf, 65536 | |
840 | or die "read error: $!"; | |
841 | $json->incr_parse ($buf); # void context, so no parsing | |
842 | } | |
843 | ||
844 | This is a complex example, but most of the complexity comes from the | |
845 | fact that we are trying to be correct (bear with me if I am wrong, I | |
846 | never ran the above example :). | |
847 | ||
592 | 848 | MAPPING |
593 | 849 | This section describes how JSON::XS maps Perl values to JSON values and |
594 | 850 | vice versa. These mappings are designed to "do the right thing" in most |
619 | 875 | parts. On the Perl level, there is no difference between those as |
620 | 876 | Perl handles all the conversion details, but an integer may take |
621 | 877 | slightly less memory and might represent more values exactly than |
622 | (floating point) numbers. | |
878 | floating point numbers. | |
623 | 879 | |
624 | 880 | If the number consists of digits only, JSON::XS will try to |
625 | 881 | represent it as an integer value. If that fails, it will try to |
626 | 882 | represent it as a numeric (floating point) value if that is possible |
627 | 883 | without loss of precision. Otherwise it will preserve the number as |
628 | a string value. | |
884 | a string value (in which case you lose roundtripping ability, as the | |
885 | JSON number will be re-encoded toa JSON string). | |
629 | 886 | |
630 | 887 | Numbers containing a fractional or exponential part will always be |
631 | 888 | represented as numeric (floating point) values, possibly at a loss |
632 | of precision. | |
633 | ||
634 | This might create round-tripping problems as numbers might become | |
635 | strings, but as Perl is typeless there is no other way to do it. | |
889 | of precision (in which case you might lose perfect roundtripping | |
890 | ability, but the JSON number will still be re-encoded as a JSON | |
891 | number). | |
636 | 892 | |
637 | 893 | true, false |
638 | 894 | These JSON atoms become "JSON::XS::true" and "JSON::XS::false", |
670 | 926 | can also use "JSON::XS::false" and "JSON::XS::true" to improve |
671 | 927 | readability. |
672 | 928 | |
673 | encode_json [\0,JSON::XS::true] # yields [false,true] | |
929 | encode_json [\0, JSON::XS::true] # yields [false,true] | |
674 | 930 | |
675 | 931 | JSON::XS::true, JSON::XS::false |
676 | 932 | These special values become JSON true and JSON false values, |
677 | 933 | respectively. You can also use "\1" and "\0" directly if you want. |
678 | 934 | |
679 | 935 | blessed objects |
680 | Blessed objects are not allowed. JSON::XS currently tries to encode | |
681 | their underlying representation (hash- or arrayref), but this | |
682 | behaviour might change in future versions. | |
936 | Blessed objects are not directly representable in JSON. See the | |
937 | "allow_blessed" and "convert_blessed" methods on various options on | |
938 | how to deal with this: basically, you can choose between throwing an | |
939 | exception, encoding the reference as if it weren't blessed, or | |
940 | provide your own serialiser method. | |
683 | 941 | |
684 | 942 | simple scalars |
685 | 943 | Simple Perl scalars (any scalar that is not a reference) are the |
686 | 944 | most difficult objects to encode: JSON::XS will encode undefined |
687 | scalars as JSON null value, scalars that have last been used in a | |
688 | string context before encoding as JSON strings and anything else as | |
945 | scalars as JSON "null" values, scalars that have last been used in a | |
946 | string context before encoding as JSON strings, and anything else as | |
689 | 947 | number value: |
690 | 948 | |
691 | 949 | # dump as number |
714 | 972 | $x *= 1; # same thing, the choice is yours. |
715 | 973 | |
716 | 974 | You can not currently force the type in other, less obscure, ways. |
717 | Tell me if you need this capability. | |
718 | ||
719 | COMPARISON | |
720 | As already mentioned, this module was created because none of the | |
721 | existing JSON modules could be made to work correctly. First I will | |
722 | describe the problems (or pleasures) I encountered with various existing | |
723 | JSON modules, followed by some benchmark values. JSON::XS was designed | |
724 | not to suffer from any of these problems or limitations. | |
725 | ||
726 | JSON 1.07 | |
727 | Slow (but very portable, as it is written in pure Perl). | |
728 | ||
729 | Undocumented/buggy Unicode handling (how JSON handles Unicode values | |
730 | is undocumented. One can get far by feeding it Unicode strings and | |
731 | doing en-/decoding oneself, but Unicode escapes are not working | |
732 | properly). | |
733 | ||
734 | No round-tripping (strings get clobbered if they look like numbers, | |
735 | e.g. the string 2.0 will encode to 2.0 instead of "2.0", and that | |
736 | will decode into the number 2. | |
737 | ||
738 | JSON::PC 0.01 | |
739 | Very fast. | |
740 | ||
741 | Undocumented/buggy Unicode handling. | |
742 | ||
743 | No round-tripping. | |
744 | ||
745 | Has problems handling many Perl values (e.g. regex results and other | |
746 | magic values will make it croak). | |
747 | ||
748 | Does not even generate valid JSON ("{1,2}" gets converted to "{1:2}" | |
749 | which is not a valid JSON text. | |
750 | ||
751 | Unmaintained (maintainer unresponsive for many months, bugs are not | |
752 | getting fixed). | |
753 | ||
754 | JSON::Syck 0.21 | |
755 | Very buggy (often crashes). | |
756 | ||
757 | Very inflexible (no human-readable format supported, format pretty | |
758 | much undocumented. I need at least a format for easy reading by | |
759 | humans and a single-line compact format for use in a protocol, and | |
760 | preferably a way to generate ASCII-only JSON texts). | |
761 | ||
762 | Completely broken (and confusingly documented) Unicode handling | |
763 | (Unicode escapes are not working properly, you need to set | |
764 | ImplicitUnicode to *different* values on en- and decoding to get | |
765 | symmetric behaviour). | |
766 | ||
767 | No round-tripping (simple cases work, but this depends on whether | |
768 | the scalar value was used in a numeric context or not). | |
769 | ||
770 | Dumping hashes may skip hash values depending on iterator state. | |
771 | ||
772 | Unmaintained (maintainer unresponsive for many months, bugs are not | |
773 | getting fixed). | |
774 | ||
775 | Does not check input for validity (i.e. will accept non-JSON input | |
776 | and return "something" instead of raising an exception. This is a | |
777 | security issue: imagine two banks transferring money between each | |
778 | other using JSON. One bank might parse a given non-JSON request and | |
779 | deduct money, while the other might reject the transaction with a | |
780 | syntax error. While a good protocol will at least recover, that is | |
781 | extra unnecessary work and the transaction will still not succeed). | |
782 | ||
783 | JSON::DWIW 0.04 | |
784 | Very fast. Very natural. Very nice. | |
785 | ||
786 | Undocumented Unicode handling (but the best of the pack. Unicode | |
787 | escapes still don't get parsed properly). | |
788 | ||
789 | Very inflexible. | |
790 | ||
791 | No round-tripping. | |
792 | ||
793 | Does not generate valid JSON texts (key strings are often unquoted, | |
794 | empty keys result in nothing being output) | |
795 | ||
796 | Does not check input for validity. | |
975 | Tell me if you need this capability (but don't forget to explain why | |
976 | it's needed :). | |
977 | ||
978 | ENCODING/CODESET FLAG NOTES | |
979 | The interested reader might have seen a number of flags that signify | |
980 | encodings or codesets - "utf8", "latin1" and "ascii". There seems to be | |
981 | some confusion on what these do, so here is a short comparison: | |
982 | ||
983 | "utf8" controls whether the JSON text created by "encode" (and expected | |
984 | by "decode") is UTF-8 encoded or not, while "latin1" and "ascii" only | |
985 | control whether "encode" escapes character values outside their | |
986 | respective codeset range. Neither of these flags conflict with each | |
987 | other, although some combinations make less sense than others. | |
988 | ||
989 | Care has been taken to make all flags symmetrical with respect to | |
990 | "encode" and "decode", that is, texts encoded with any combination of | |
991 | these flag values will be correctly decoded when the same flags are used | |
992 | - in general, if you use different flag settings while encoding vs. when | |
993 | decoding you likely have a bug somewhere. | |
994 | ||
995 | Below comes a verbose discussion of these flags. Note that a "codeset" | |
996 | is simply an abstract set of character-codepoint pairs, while an | |
997 | encoding takes those codepoint numbers and *encodes* them, in our case | |
998 | into octets. Unicode is (among other things) a codeset, UTF-8 is an | |
999 | encoding, and ISO-8859-1 (= latin 1) and ASCII are both codesets *and* | |
1000 | encodings at the same time, which can be confusing. | |
1001 | ||
1002 | "utf8" flag disabled | |
1003 | When "utf8" is disabled (the default), then "encode"/"decode" | |
1004 | generate and expect Unicode strings, that is, characters with high | |
1005 | ordinal Unicode values (> 255) will be encoded as such characters, | |
1006 | and likewise such characters are decoded as-is, no canges to them | |
1007 | will be done, except "(re-)interpreting" them as Unicode codepoints | |
1008 | or Unicode characters, respectively (to Perl, these are the same | |
1009 | thing in strings unless you do funny/weird/dumb stuff). | |
1010 | ||
1011 | This is useful when you want to do the encoding yourself (e.g. when | |
1012 | you want to have UTF-16 encoded JSON texts) or when some other layer | |
1013 | does the encoding for you (for example, when printing to a terminal | |
1014 | using a filehandle that transparently encodes to UTF-8 you certainly | |
1015 | do NOT want to UTF-8 encode your data first and have Perl encode it | |
1016 | another time). | |
1017 | ||
1018 | "utf8" flag enabled | |
1019 | If the "utf8"-flag is enabled, "encode"/"decode" will encode all | |
1020 | characters using the corresponding UTF-8 multi-byte sequence, and | |
1021 | will expect your input strings to be encoded as UTF-8, that is, no | |
1022 | "character" of the input string must have any value > 255, as UTF-8 | |
1023 | does not allow that. | |
1024 | ||
1025 | The "utf8" flag therefore switches between two modes: disabled means | |
1026 | you will get a Unicode string in Perl, enabled means you get an | |
1027 | UTF-8 encoded octet/binary string in Perl. | |
1028 | ||
1029 | "latin1" or "ascii" flags enabled | |
1030 | With "latin1" (or "ascii") enabled, "encode" will escape characters | |
1031 | with ordinal values > 255 (> 127 with "ascii") and encode the | |
1032 | remaining characters as specified by the "utf8" flag. | |
1033 | ||
1034 | If "utf8" is disabled, then the result is also correctly encoded in | |
1035 | those character sets (as both are proper subsets of Unicode, meaning | |
1036 | that a Unicode string with all character values < 256 is the same | |
1037 | thing as a ISO-8859-1 string, and a Unicode string with all | |
1038 | character values < 128 is the same thing as an ASCII string in | |
1039 | Perl). | |
1040 | ||
1041 | If "utf8" is enabled, you still get a correct UTF-8-encoded string, | |
1042 | regardless of these flags, just some more characters will be escaped | |
1043 | using "\uXXXX" then before. | |
1044 | ||
1045 | Note that ISO-8859-1-*encoded* strings are not compatible with UTF-8 | |
1046 | encoding, while ASCII-encoded strings are. That is because the | |
1047 | ISO-8859-1 encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 | |
1048 | *codeset* being a subset of Unicode), while ASCII is. | |
1049 | ||
1050 | Surprisingly, "decode" will ignore these flags and so treat all | |
1051 | input values as governed by the "utf8" flag. If it is disabled, this | |
1052 | allows you to decode ISO-8859-1- and ASCII-encoded strings, as both | |
1053 | strict subsets of Unicode. If it is enabled, you can correctly | |
1054 | decode UTF-8 encoded strings. | |
1055 | ||
1056 | So neither "latin1" nor "ascii" are incompatible with the "utf8" | |
1057 | flag - they only govern when the JSON output engine escapes a | |
1058 | character or not. | |
1059 | ||
1060 | The main use for "latin1" is to relatively efficiently store binary | |
1061 | data as JSON, at the expense of breaking compatibility with most | |
1062 | JSON decoders. | |
1063 | ||
1064 | The main use for "ascii" is to force the output to not contain | |
1065 | characters with values > 127, which means you can interpret the | |
1066 | resulting string as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about | |
1067 | any character set and 8-bit-encoding, and still get the same data | |
1068 | structure back. This is useful when your channel for JSON transfer | |
1069 | is not 8-bit clean or the encoding might be mangled in between (e.g. | |
1070 | in mail), and works because ASCII is a proper subset of most 8-bit | |
1071 | and multibyte encodings in use in the world. | |
797 | 1072 | |
798 | 1073 | JSON and YAML |
799 | You often hear that JSON is a subset (or a close subset) of YAML. This | |
800 | is, however, a mass hysteria and very far from the truth. In general, | |
801 | there is no way to configure JSON::XS to output a data structure as | |
802 | valid YAML. | |
1074 | You often hear that JSON is a subset of YAML. This is, however, a mass | |
1075 | hysteria(*) and very far from the truth (as of the time of this | |
1076 | writing), so let me state it clearly: *in general, there is no way to | |
1077 | configure JSON::XS to output a data structure as valid YAML* that works | |
1078 | in all cases. | |
803 | 1079 | |
804 | 1080 | If you really must use JSON::XS to generate YAML, you should use this |
805 | 1081 | algorithm (subject to change in future versions): |
807 | 1083 | my $to_yaml = JSON::XS->new->utf8->space_after (1); |
808 | 1084 | my $yaml = $to_yaml->encode ($ref) . "\n"; |
809 | 1085 | |
810 | This will usually generate JSON texts that also parse as valid YAML. | |
1086 | This will *usually* generate JSON texts that also parse as valid YAML. | |
811 | 1087 | Please note that YAML has hardcoded limits on (simple) object key |
812 | lengths that JSON doesn't have, so you should make sure that your hash | |
813 | keys are noticeably shorter than the 1024 characters YAML allows. | |
814 | ||
815 | There might be other incompatibilities that I am not aware of. In | |
816 | general you should not try to generate YAML with a JSON generator or | |
1088 | lengths that JSON doesn't have and also has different and incompatible | |
1089 | unicode handling, so you should make sure that your hash keys are | |
1090 | noticeably shorter than the 1024 "stream characters" YAML allows and | |
1091 | that you do not have characters with codepoint values outside the | |
1092 | Unicode BMP (basic multilingual page). YAML also does not allow "\/" | |
1093 | sequences in strings (which JSON::XS does not *currently* generate, but | |
1094 | other JSON generators might). | |
1095 | ||
1096 | There might be other incompatibilities that I am not aware of (or the | |
1097 | YAML specification has been changed yet again - it does so quite often). | |
1098 | In general you should not try to generate YAML with a JSON generator or | |
817 | 1099 | vice versa, or try to parse JSON with a YAML parser or vice versa: |
818 | chances are high that you will run into severe interoperability | |
819 | problems. | |
1100 | chances are high that you will run into severe interoperability problems | |
1101 | when you least expect it. | |
1102 | ||
1103 | (*) I have been pressured multiple times by Brian Ingerson (one of the | |
1104 | authors of the YAML specification) to remove this paragraph, despite | |
1105 | him acknowledging that the actual incompatibilities exist. As I was | |
1106 | personally bitten by this "JSON is YAML" lie, I refused and said I | |
1107 | will continue to educate people about these issues, so others do not | |
1108 | run into the same problem again and again. After this, Brian called | |
1109 | me a (quote)*complete and worthless idiot*(unquote). | |
1110 | ||
1111 | In my opinion, instead of pressuring and insulting people who | |
1112 | actually clarify issues with YAML and the wrong statements of some | |
1113 | of its proponents, I would kindly suggest reading the JSON spec | |
1114 | (which is not that difficult or long) and finally make YAML | |
1115 | compatible to it, and educating users about the changes, instead of | |
1116 | spreading lies about the real compatibility for many *years* and | |
1117 | trying to silence people who point out that it isn't true. | |
820 | 1118 | |
821 | 1119 | SPEED |
822 | 1120 | It seems that JSON::XS is surprisingly fast, as shown in the following |
825 | 1123 | system. |
826 | 1124 | |
827 | 1125 | First comes a comparison between various modules using a very short |
828 | single-line JSON string: | |
829 | ||
830 | {"method": "handleMessage", "params": ["user1", "we were just talking"], \ | |
831 | "id": null, "array":[1,11,234,-5,1e5,1e7, true, false]} | |
1126 | single-line JSON string (also available at | |
1127 | <http://dist.schmorp.de/misc/json/short.json>). | |
1128 | ||
1129 | {"method": "handleMessage", "params": ["user1", | |
1130 | "we were just talking"], "id": null, "array":[1,11,234,-5,1e5,1e7, | |
1131 | true, false]} | |
832 | 1132 | |
833 | 1133 | It shows the number of encodes/decodes per second (JSON::XS uses the |
834 | 1134 | functional interface, while JSON::XS/2 uses the OO interface with |
854 | 1154 | compares favourably to Storable for small amounts of data. |
855 | 1155 | |
856 | 1156 | Using a longer test string (roughly 18KB, generated from Yahoo! Locals |
857 | search API (http://nanoref.com/yahooapis/mgPdGg): | |
1157 | search API (<http://dist.schmorp.de/misc/json/long.json>). | |
858 | 1158 | |
859 | 1159 | module | encode | decode | |
860 | 1160 | -----------|------------|------------| |
901 | 1201 | machine with 8MB of stack size I can decode around 180k nested arrays |
902 | 1202 | but only 14k nested JSON objects (due to perl itself recursing deeply on |
903 | 1203 | croak to free the temporary). If that is exceeded, the program crashes. |
904 | to be conservative, the default nesting limit is set to 512. If your | |
1204 | To be conservative, the default nesting limit is set to 512. If your | |
905 | 1205 | process has a smaller stack, you should adjust this setting accordingly |
906 | 1206 | with the "max_depth" method. |
907 | 1207 | |
908 | And last but least, something else could bomb you that I forgot to think | |
909 | of. In that case, you get to keep the pieces. I am always open for | |
910 | hints, though... | |
1208 | Something else could bomb you, too, that I forgot to think of. In that | |
1209 | case, you get to keep the pieces. I am always open for hints, though... | |
1210 | ||
1211 | Also keep in mind that JSON::XS might leak contents of your Perl data | |
1212 | structures in its error messages, so when you serialise sensitive | |
1213 | information you might want to make sure that exceptions thrown by | |
1214 | JSON::XS will not end up in front of untrusted eyes. | |
911 | 1215 | |
912 | 1216 | If you are using JSON::XS to return packets to consumption by JavaScript |
913 | 1217 | scripts in a browser you should have a look at |
914 | 1218 | <http://jpsykes.com/47/practical-csrf-and-json-security> to see whether |
915 | 1219 | you are vulnerable to some common attack vectors (which really are |
916 | 1220 | browser design bugs, but it is still you who will have to deal with it, |
917 | as major browser developers care only for features, not about doing | |
1221 | as major browser developers care only for features, not about getting | |
918 | 1222 | security right). |
919 | 1223 | |
920 | 1224 | THREADS |
921 | 1225 | This module is *not* guaranteed to be thread safe and there are no plans |
922 | 1226 | to change this until Perl gets thread support (as opposed to the |
923 | 1227 | horribly slow so-called "threads" which are simply slow and bloated |
924 | process simulations - use fork, its *much* faster, cheaper, better). | |
1228 | process simulations - use fork, it's *much* faster, cheaper, better). | |
925 | 1229 | |
926 | 1230 | (It might actually work, but you have been warned). |
927 | 1231 | |
928 | 1232 | BUGS |
929 | 1233 | While the goal of this module is to be correct, that unfortunately does |
930 | not mean its bug-free, only that I think its design is bug-free. It is | |
931 | still relatively early in its development. If you keep reporting bugs | |
932 | they will be fixed swiftly, though. | |
1234 | not mean it's bug-free, only that I think its design is bug-free. If you | |
1235 | keep reporting bugs they will be fixed swiftly, though. | |
933 | 1236 | |
934 | 1237 | Please refrain from using rt.cpan.org or any other bug reporting |
935 | 1238 | service. I put the contact address into my modules for a reason. |
1239 | ||
1240 | SEE ALSO | |
1241 | The json_xs command line utility for quick experiments. | |
936 | 1242 | |
937 | 1243 | AUTHOR |
938 | 1244 | Marc Lehmann <schmorp@schmorp.de> |
0 | 0 | =head1 NAME |
1 | 1 | |
2 | 2 | JSON::XS - JSON serialising/deserialising, done correctly and fast |
3 | ||
4 | =encoding utf-8 | |
3 | 5 | |
4 | 6 | JSON::XS - 正しくて高速な JSON シリアライザ/デシリアライザ |
5 | 7 | (http://fleur.hio.jp/perldoc/mix/lib/JSON/XS.html) |
36 | 38 | |
37 | 39 | Beginning with version 2.0 of the JSON module, when both JSON and |
38 | 40 | JSON::XS are installed, then JSON will fall back on JSON::XS (this can be |
39 | overriden) with no overhead due to emulation (by inheritign constructor | |
41 | overridden) with no overhead due to emulation (by inheriting constructor | |
40 | 42 | and methods). If JSON::XS is not available, it will fall back to the |
41 | 43 | compatible JSON::PP module as backend, so using JSON instead of JSON::XS |
42 | 44 | gives you a portable JSON API that can be fast when you need and doesn't |
48 | 50 | their maintainers are unresponsive, gone missing, or not listening to bug |
49 | 51 | reports for other reasons. |
50 | 52 | |
51 | See COMPARISON, below, for a comparison to some other JSON modules. | |
52 | ||
53 | 53 | See MAPPING, below, on how JSON::XS maps perl values to JSON values and |
54 | 54 | vice versa. |
55 | 55 | |
59 | 59 | |
60 | 60 | =item * correct Unicode handling |
61 | 61 | |
62 | This module knows how to handle Unicode, and even documents how and when | |
63 | it does so. | |
62 | This module knows how to handle Unicode, documents how and when it does | |
63 | so, and even documents what "correct" means. | |
64 | 64 | |
65 | 65 | =item * round-trip integrity |
66 | 66 | |
67 | When you serialise a perl data structure using only datatypes supported | |
67 | When you serialise a perl data structure using only data types supported | |
68 | 68 | by JSON, the deserialised data structure is identical on the Perl level. |
69 | 69 | (e.g. the string "2.0" doesn't suddenly become "2" just because it looks |
70 | like a number). | |
70 | like a number). There minor I<are> exceptions to this, read the MAPPING | |
71 | section below to learn about those. | |
71 | 72 | |
72 | 73 | =item * strict checking of JSON correctness |
73 | 74 | |
77 | 78 | |
78 | 79 | =item * fast |
79 | 80 | |
80 | Compared to other JSON modules, this module compares favourably in terms | |
81 | of speed, too. | |
81 | Compared to other JSON modules and other serialisers such as Storable, | |
82 | this module usually compares favourably in terms of speed, too. | |
82 | 83 | |
83 | 84 | =item * simple to use |
84 | 85 | |
85 | This module has both a simple functional interface as well as an OO | |
86 | interface. | |
86 | This module has both a simple functional interface as well as an object | |
87 | oriented interface interface. | |
87 | 88 | |
88 | 89 | =item * reasonably versatile output formats |
89 | 90 | |
90 | You can choose between the most compact guaranteed single-line format | |
91 | possible (nice for simple line-based protocols), a pure-ascii format | |
91 | You can choose between the most compact guaranteed-single-line format | |
92 | possible (nice for simple line-based protocols), a pure-ASCII format | |
92 | 93 | (for when your transport is not 8-bit clean, still supports the whole |
93 | 94 | Unicode range), or a pretty-printed format (for when you want to read that |
94 | 95 | stuff). Or you can combine those features in whatever way you like. |
99 | 100 | |
100 | 101 | package JSON::XS; |
101 | 102 | |
103 | no warnings; | |
102 | 104 | use strict; |
103 | 105 | |
104 | our $VERSION = '2.01'; | |
106 | our $VERSION = '2.23'; | |
105 | 107 | our @ISA = qw(Exporter); |
106 | 108 | |
107 | 109 | our @EXPORT = qw(encode_json decode_json to_json from_json); |
135 | 137 | |
136 | 138 | $json_text = JSON::XS->new->utf8->encode ($perl_scalar) |
137 | 139 | |
138 | except being faster. | |
140 | Except being faster. | |
139 | 141 | |
140 | 142 | =item $perl_scalar = decode_json $json_text |
141 | 143 | |
147 | 149 | |
148 | 150 | $perl_scalar = JSON::XS->new->utf8->decode ($json_text) |
149 | 151 | |
150 | except being faster. | |
152 | Except being faster. | |
151 | 153 | |
152 | 154 | =item $is_boolean = JSON::XS::is_bool $scalar |
153 | 155 | |
175 | 177 | |
176 | 178 | =item 2. Perl does I<not> associate an encoding with your strings. |
177 | 179 | |
178 | Unless you force it to, e.g. when matching it against a regex, or printing | |
179 | the scalar to a file, in which case Perl either interprets your string as | |
180 | locale-encoded text, octets/binary, or as Unicode, depending on various | |
181 | settings. In no case is an encoding stored together with your data, it is | |
182 | I<use> that decides encoding, not any magical metadata. | |
180 | ... until you force it to, e.g. when matching it against a regex, or | |
181 | printing the scalar to a file, in which case Perl either interprets your | |
182 | string as locale-encoded text, octets/binary, or as Unicode, depending | |
183 | on various settings. In no case is an encoding stored together with your | |
184 | data, it is I<use> that decides encoding, not any magical meta data. | |
183 | 185 | |
184 | 186 | =item 3. The internal utf-8 flag has no meaning with regards to the |
185 | 187 | encoding of your string. |
195 | 197 | exist. |
196 | 198 | |
197 | 199 | =item 4. A "Unicode String" is simply a string where each character can be |
198 | validly interpreted as a Unicode codepoint. | |
200 | validly interpreted as a Unicode code point. | |
199 | 201 | |
200 | 202 | If you have UTF-8 encoded data, it is no longer a Unicode string, but a |
201 | 203 | Unicode string encoded in UTF-8, giving you a binary string. |
243 | 245 | characters unless required by the JSON syntax or other flags. This results |
244 | 246 | in a faster and more compact format. |
245 | 247 | |
248 | See also the section I<ENCODING/CODESET FLAG NOTES> later in this | |
249 | document. | |
250 | ||
246 | 251 | The main use for this flag is to produce JSON texts that can be |
247 | 252 | transmitted over a 7-bit channel, as the encoded JSON texts will not |
248 | 253 | contain any 8 bit characters. |
263 | 268 | |
264 | 269 | If C<$enable> is false, then the C<encode> method will not escape Unicode |
265 | 270 | characters unless required by the JSON syntax or other flags. |
271 | ||
272 | See also the section I<ENCODING/CODESET FLAG NOTES> later in this | |
273 | document. | |
266 | 274 | |
267 | 275 | The main use for this flag is efficiently encoding binary data as JSON |
268 | 276 | text, as most octets will not be escaped, resulting in a smaller encoded |
291 | 299 | string as a (non-encoded) Unicode string, while C<decode> expects thus a |
292 | 300 | Unicode string. Any decoding or encoding (e.g. to UTF-8 or UTF-16) needs |
293 | 301 | to be done yourself, e.g. using the Encode module. |
302 | ||
303 | See also the section I<ENCODING/CODESET FLAG NOTES> later in this | |
304 | document. | |
294 | 305 | |
295 | 306 | Example, output UTF-16BE-encoded JSON: |
296 | 307 | |
451 | 462 | |
452 | 463 | JSON::XS->new->allow_nonref->encode ("Hello, World!") |
453 | 464 | => "Hello, World!" |
465 | ||
466 | =item $json = $json->allow_unknown ([$enable]) | |
467 | ||
468 | =item $enabled = $json->get_allow_unknown | |
469 | ||
470 | If C<$enable> is true (or missing), then C<encode> will I<not> throw an | |
471 | exception when it encounters values it cannot represent in JSON (for | |
472 | example, filehandles) but instead will encode a JSON C<null> value. Note | |
473 | that blessed objects are not included here and are handled separately by | |
474 | c<allow_nonref>. | |
475 | ||
476 | If C<$enable> is false (the default), then C<encode> will throw an | |
477 | exception when it encounters anything it cannot encode as JSON. | |
478 | ||
479 | This option does not affect C<decode> in any way, and it is recommended to | |
480 | leave it off unless you know your communications partner. | |
454 | 481 | |
455 | 482 | =item $json = $json->allow_blessed ([$enable]) |
456 | 483 | |
601 | 628 | =item $max_depth = $json->get_max_depth |
602 | 629 | |
603 | 630 | Sets the maximum nesting level (default C<512>) accepted while encoding |
604 | or decoding. If the JSON text or Perl data structure has an equal or | |
605 | higher nesting level then this limit, then the encoder and decoder will | |
606 | stop and croak at that point. | |
631 | or decoding. If a higher nesting level is detected in JSON text or a Perl | |
632 | data structure, then the encoder and decoder will stop and croak at that | |
633 | point. | |
607 | 634 | |
608 | 635 | Nesting level is defined by number of hash- or arrayrefs that the encoder |
609 | 636 | needs to traverse to reach a given point or the number of C<{> or C<[> |
613 | 640 | Setting the maximum depth to one disallows any nesting, so that ensures |
614 | 641 | that the object is only a single hash/object or array. |
615 | 642 | |
616 | The argument to C<max_depth> will be rounded up to the next highest power | |
617 | of two. If no argument is given, the highest possible setting will be | |
618 | used, which is rarely useful. | |
643 | If no argument is given, the highest possible setting will be used, which | |
644 | is rarely useful. | |
645 | ||
646 | Note that nesting is implemented by recursion in C. The default value has | |
647 | been chosen to be as large as typical operating systems allow without | |
648 | crashing. | |
619 | 649 | |
620 | 650 | See SECURITY CONSIDERATIONS, below, for more info on why this is useful. |
621 | 651 | |
625 | 655 | |
626 | 656 | Set the maximum length a JSON text may have (in bytes) where decoding is |
627 | 657 | being attempted. The default is C<0>, meaning no limit. When C<decode> |
628 | is called on a string longer then this number of characters it will not | |
658 | is called on a string that is longer then this many bytes, it will not | |
629 | 659 | attempt to decode the string but throw an exception. This setting has no |
630 | 660 | effect on C<encode> (yet). |
631 | 661 | |
632 | The argument to C<max_size> will be rounded up to the next B<highest> | |
633 | power of two (so may be more than requested). If no argument is given, the | |
634 | limit check will be deactivated (same as when C<0> is specified). | |
662 | If no argument is given, the limit check will be deactivated (same as when | |
663 | C<0> is specified). | |
635 | 664 | |
636 | 665 | See SECURITY CONSIDERATIONS, below, for more info on why this is useful. |
637 | 666 | |
670 | 699 | =back |
671 | 700 | |
672 | 701 | |
702 | =head1 INCREMENTAL PARSING | |
703 | ||
704 | In some cases, there is the need for incremental parsing of JSON | |
705 | texts. While this module always has to keep both JSON text and resulting | |
706 | Perl data structure in memory at one time, it does allow you to parse a | |
707 | JSON stream incrementally. It does so by accumulating text until it has | |
708 | a full JSON object, which it then can decode. This process is similar to | |
709 | using C<decode_prefix> to see if a full JSON object is available, but | |
710 | is much more efficient (and can be implemented with a minimum of method | |
711 | calls). | |
712 | ||
713 | JSON::XS will only attempt to parse the JSON text once it is sure it | |
714 | has enough text to get a decisive result, using a very simple but | |
715 | truly incremental parser. This means that it sometimes won't stop as | |
716 | early as the full parser, for example, it doesn't detect parenthese | |
717 | mismatches. The only thing it guarantees is that it starts decoding as | |
718 | soon as a syntactically valid JSON text has been seen. This means you need | |
719 | to set resource limits (e.g. C<max_size>) to ensure the parser will stop | |
720 | parsing in the presence if syntax errors. | |
721 | ||
722 | The following methods implement this incremental parser. | |
723 | ||
724 | =over 4 | |
725 | ||
726 | =item [void, scalar or list context] = $json->incr_parse ([$string]) | |
727 | ||
728 | This is the central parsing function. It can both append new text and | |
729 | extract objects from the stream accumulated so far (both of these | |
730 | functions are optional). | |
731 | ||
732 | If C<$string> is given, then this string is appended to the already | |
733 | existing JSON fragment stored in the C<$json> object. | |
734 | ||
735 | After that, if the function is called in void context, it will simply | |
736 | return without doing anything further. This can be used to add more text | |
737 | in as many chunks as you want. | |
738 | ||
739 | If the method is called in scalar context, then it will try to extract | |
740 | exactly I<one> JSON object. If that is successful, it will return this | |
741 | object, otherwise it will return C<undef>. If there is a parse error, | |
742 | this method will croak just as C<decode> would do (one can then use | |
743 | C<incr_skip> to skip the errornous part). This is the most common way of | |
744 | using the method. | |
745 | ||
746 | And finally, in list context, it will try to extract as many objects | |
747 | from the stream as it can find and return them, or the empty list | |
748 | otherwise. For this to work, there must be no separators between the JSON | |
749 | objects or arrays, instead they must be concatenated back-to-back. If | |
750 | an error occurs, an exception will be raised as in the scalar context | |
751 | case. Note that in this case, any previously-parsed JSON texts will be | |
752 | lost. | |
753 | ||
754 | =item $lvalue_string = $json->incr_text | |
755 | ||
756 | This method returns the currently stored JSON fragment as an lvalue, that | |
757 | is, you can manipulate it. This I<only> works when a preceding call to | |
758 | C<incr_parse> in I<scalar context> successfully returned an object. Under | |
759 | all other circumstances you must not call this function (I mean it. | |
760 | although in simple tests it might actually work, it I<will> fail under | |
761 | real world conditions). As a special exception, you can also call this | |
762 | method before having parsed anything. | |
763 | ||
764 | This function is useful in two cases: a) finding the trailing text after a | |
765 | JSON object or b) parsing multiple JSON objects separated by non-JSON text | |
766 | (such as commas). | |
767 | ||
768 | =item $json->incr_skip | |
769 | ||
770 | This will reset the state of the incremental parser and will remove the | |
771 | parsed text from the input buffer. This is useful after C<incr_parse> | |
772 | died, in which case the input buffer and incremental parser state is left | |
773 | unchanged, to skip the text parsed so far and to reset the parse state. | |
774 | ||
775 | =item $json->incr_reset | |
776 | ||
777 | This completely resets the incremental parser, that is, after this call, | |
778 | it will be as if the parser had never parsed anything. | |
779 | ||
780 | This is useful if you want ot repeatedly parse JSON objects and want to | |
781 | ignore any trailing data, which means you have to reset the parser after | |
782 | each successful decode. | |
783 | ||
784 | =back | |
785 | ||
786 | =head2 LIMITATIONS | |
787 | ||
788 | All options that affect decoding are supported, except | |
789 | C<allow_nonref>. The reason for this is that it cannot be made to | |
790 | work sensibly: JSON objects and arrays are self-delimited, i.e. you can concatenate | |
791 | them back to back and still decode them perfectly. This does not hold true | |
792 | for JSON numbers, however. | |
793 | ||
794 | For example, is the string C<1> a single JSON number, or is it simply the | |
795 | start of C<12>? Or is C<12> a single JSON number, or the concatenation | |
796 | of C<1> and C<2>? In neither case you can tell, and this is why JSON::XS | |
797 | takes the conservative route and disallows this case. | |
798 | ||
799 | =head2 EXAMPLES | |
800 | ||
801 | Some examples will make all this clearer. First, a simple example that | |
802 | works similarly to C<decode_prefix>: We want to decode the JSON object at | |
803 | the start of a string and identify the portion after the JSON object: | |
804 | ||
805 | my $text = "[1,2,3] hello"; | |
806 | ||
807 | my $json = new JSON::XS; | |
808 | ||
809 | my $obj = $json->incr_parse ($text) | |
810 | or die "expected JSON object or array at beginning of string"; | |
811 | ||
812 | my $tail = $json->incr_text; | |
813 | # $tail now contains " hello" | |
814 | ||
815 | Easy, isn't it? | |
816 | ||
817 | Now for a more complicated example: Imagine a hypothetical protocol where | |
818 | you read some requests from a TCP stream, and each request is a JSON | |
819 | array, without any separation between them (in fact, it is often useful to | |
820 | use newlines as "separators", as these get interpreted as whitespace at | |
821 | the start of the JSON text, which makes it possible to test said protocol | |
822 | with C<telnet>...). | |
823 | ||
824 | Here is how you'd do it (it is trivial to write this in an event-based | |
825 | manner): | |
826 | ||
827 | my $json = new JSON::XS; | |
828 | ||
829 | # read some data from the socket | |
830 | while (sysread $socket, my $buf, 4096) { | |
831 | ||
832 | # split and decode as many requests as possible | |
833 | for my $request ($json->incr_parse ($buf)) { | |
834 | # act on the $request | |
835 | } | |
836 | } | |
837 | ||
838 | Another complicated example: Assume you have a string with JSON objects | |
839 | or arrays, all separated by (optional) comma characters (e.g. C<[1],[2], | |
840 | [3]>). To parse them, we have to skip the commas between the JSON texts, | |
841 | and here is where the lvalue-ness of C<incr_text> comes in useful: | |
842 | ||
843 | my $text = "[1],[2], [3]"; | |
844 | my $json = new JSON::XS; | |
845 | ||
846 | # void context, so no parsing done | |
847 | $json->incr_parse ($text); | |
848 | ||
849 | # now extract as many objects as possible. note the | |
850 | # use of scalar context so incr_text can be called. | |
851 | while (my $obj = $json->incr_parse) { | |
852 | # do something with $obj | |
853 | ||
854 | # now skip the optional comma | |
855 | $json->incr_text =~ s/^ \s* , //x; | |
856 | } | |
857 | ||
858 | Now lets go for a very complex example: Assume that you have a gigantic | |
859 | JSON array-of-objects, many gigabytes in size, and you want to parse it, | |
860 | but you cannot load it into memory fully (this has actually happened in | |
861 | the real world :). | |
862 | ||
863 | Well, you lost, you have to implement your own JSON parser. But JSON::XS | |
864 | can still help you: You implement a (very simple) array parser and let | |
865 | JSON decode the array elements, which are all full JSON objects on their | |
866 | own (this wouldn't work if the array elements could be JSON numbers, for | |
867 | example): | |
868 | ||
869 | my $json = new JSON::XS; | |
870 | ||
871 | # open the monster | |
872 | open my $fh, "<bigfile.json" | |
873 | or die "bigfile: $!"; | |
874 | ||
875 | # first parse the initial "[" | |
876 | for (;;) { | |
877 | sysread $fh, my $buf, 65536 | |
878 | or die "read error: $!"; | |
879 | $json->incr_parse ($buf); # void context, so no parsing | |
880 | ||
881 | # Exit the loop once we found and removed(!) the initial "[". | |
882 | # In essence, we are (ab-)using the $json object as a simple scalar | |
883 | # we append data to. | |
884 | last if $json->incr_text =~ s/^ \s* \[ //x; | |
885 | } | |
886 | ||
887 | # now we have the skipped the initial "[", so continue | |
888 | # parsing all the elements. | |
889 | for (;;) { | |
890 | # in this loop we read data until we got a single JSON object | |
891 | for (;;) { | |
892 | if (my $obj = $json->incr_parse) { | |
893 | # do something with $obj | |
894 | last; | |
895 | } | |
896 | ||
897 | # add more data | |
898 | sysread $fh, my $buf, 65536 | |
899 | or die "read error: $!"; | |
900 | $json->incr_parse ($buf); # void context, so no parsing | |
901 | } | |
902 | ||
903 | # in this loop we read data until we either found and parsed the | |
904 | # separating "," between elements, or the final "]" | |
905 | for (;;) { | |
906 | # first skip whitespace | |
907 | $json->incr_text =~ s/^\s*//; | |
908 | ||
909 | # if we find "]", we are done | |
910 | if ($json->incr_text =~ s/^\]//) { | |
911 | print "finished.\n"; | |
912 | exit; | |
913 | } | |
914 | ||
915 | # if we find ",", we can continue with the next element | |
916 | if ($json->incr_text =~ s/^,//) { | |
917 | last; | |
918 | } | |
919 | ||
920 | # if we find anything else, we have a parse error! | |
921 | if (length $json->incr_text) { | |
922 | die "parse error near ", $json->incr_text; | |
923 | } | |
924 | ||
925 | # else add more data | |
926 | sysread $fh, my $buf, 65536 | |
927 | or die "read error: $!"; | |
928 | $json->incr_parse ($buf); # void context, so no parsing | |
929 | } | |
930 | ||
931 | This is a complex example, but most of the complexity comes from the fact | |
932 | that we are trying to be correct (bear with me if I am wrong, I never ran | |
933 | the above example :). | |
934 | ||
935 | ||
936 | ||
673 | 937 | =head1 MAPPING |
674 | 938 | |
675 | 939 | This section describes how JSON::XS maps Perl values to JSON values and |
707 | 971 | string scalar in perl, depending on its range and any fractional parts. On |
708 | 972 | the Perl level, there is no difference between those as Perl handles all |
709 | 973 | the conversion details, but an integer may take slightly less memory and |
710 | might represent more values exactly than (floating point) numbers. | |
974 | might represent more values exactly than floating point numbers. | |
711 | 975 | |
712 | 976 | If the number consists of digits only, JSON::XS will try to represent |
713 | 977 | it as an integer value. If that fails, it will try to represent it as |
714 | 978 | a numeric (floating point) value if that is possible without loss of |
715 | precision. Otherwise it will preserve the number as a string value. | |
979 | precision. Otherwise it will preserve the number as a string value (in | |
980 | which case you lose roundtripping ability, as the JSON number will be | |
981 | re-encoded toa JSON string). | |
716 | 982 | |
717 | 983 | Numbers containing a fractional or exponential part will always be |
718 | 984 | represented as numeric (floating point) values, possibly at a loss of |
719 | precision. | |
720 | ||
721 | This might create round-tripping problems as numbers might become strings, | |
722 | but as Perl is typeless there is no other way to do it. | |
985 | precision (in which case you might lose perfect roundtripping ability, but | |
986 | the JSON number will still be re-encoded as a JSON number). | |
723 | 987 | |
724 | 988 | =item true, false |
725 | 989 | |
766 | 1030 | C<1>, which get turned into C<false> and C<true> atoms in JSON. You can |
767 | 1031 | also use C<JSON::XS::false> and C<JSON::XS::true> to improve readability. |
768 | 1032 | |
769 | encode_json [\0,JSON::XS::true] # yields [false,true] | |
1033 | encode_json [\0, JSON::XS::true] # yields [false,true] | |
770 | 1034 | |
771 | 1035 | =item JSON::XS::true, JSON::XS::false |
772 | 1036 | |
775 | 1039 | |
776 | 1040 | =item blessed objects |
777 | 1041 | |
778 | Blessed objects are not allowed. JSON::XS currently tries to encode their | |
779 | underlying representation (hash- or arrayref), but this behaviour might | |
780 | change in future versions. | |
1042 | Blessed objects are not directly representable in JSON. See the | |
1043 | C<allow_blessed> and C<convert_blessed> methods on various options on | |
1044 | how to deal with this: basically, you can choose between throwing an | |
1045 | exception, encoding the reference as if it weren't blessed, or provide | |
1046 | your own serialiser method. | |
781 | 1047 | |
782 | 1048 | =item simple scalars |
783 | 1049 | |
784 | 1050 | Simple Perl scalars (any scalar that is not a reference) are the most |
785 | 1051 | difficult objects to encode: JSON::XS will encode undefined scalars as |
786 | JSON null value, scalars that have last been used in a string context | |
787 | before encoding as JSON strings and anything else as number value: | |
1052 | JSON C<null> values, scalars that have last been used in a string context | |
1053 | before encoding as JSON strings, and anything else as number value: | |
788 | 1054 | |
789 | 1055 | # dump as number |
790 | 1056 | encode_json [2] # yields [2] |
812 | 1078 | $x *= 1; # same thing, the choice is yours. |
813 | 1079 | |
814 | 1080 | You can not currently force the type in other, less obscure, ways. Tell me |
815 | if you need this capability. | |
1081 | if you need this capability (but don't forget to explain why it's needed | |
1082 | :). | |
816 | 1083 | |
817 | 1084 | =back |
818 | 1085 | |
819 | 1086 | |
820 | =head1 COMPARISON | |
821 | ||
822 | As already mentioned, this module was created because none of the existing | |
823 | JSON modules could be made to work correctly. First I will describe the | |
824 | problems (or pleasures) I encountered with various existing JSON modules, | |
825 | followed by some benchmark values. JSON::XS was designed not to suffer | |
826 | from any of these problems or limitations. | |
1087 | =head1 ENCODING/CODESET FLAG NOTES | |
1088 | ||
1089 | The interested reader might have seen a number of flags that signify | |
1090 | encodings or codesets - C<utf8>, C<latin1> and C<ascii>. There seems to be | |
1091 | some confusion on what these do, so here is a short comparison: | |
1092 | ||
1093 | C<utf8> controls whether the JSON text created by C<encode> (and expected | |
1094 | by C<decode>) is UTF-8 encoded or not, while C<latin1> and C<ascii> only | |
1095 | control whether C<encode> escapes character values outside their respective | |
1096 | codeset range. Neither of these flags conflict with each other, although | |
1097 | some combinations make less sense than others. | |
1098 | ||
1099 | Care has been taken to make all flags symmetrical with respect to | |
1100 | C<encode> and C<decode>, that is, texts encoded with any combination of | |
1101 | these flag values will be correctly decoded when the same flags are used | |
1102 | - in general, if you use different flag settings while encoding vs. when | |
1103 | decoding you likely have a bug somewhere. | |
1104 | ||
1105 | Below comes a verbose discussion of these flags. Note that a "codeset" is | |
1106 | simply an abstract set of character-codepoint pairs, while an encoding | |
1107 | takes those codepoint numbers and I<encodes> them, in our case into | |
1108 | octets. Unicode is (among other things) a codeset, UTF-8 is an encoding, | |
1109 | and ISO-8859-1 (= latin 1) and ASCII are both codesets I<and> encodings at | |
1110 | the same time, which can be confusing. | |
827 | 1111 | |
828 | 1112 | =over 4 |
829 | 1113 | |
830 | =item JSON 1.07 | |
831 | ||
832 | Slow (but very portable, as it is written in pure Perl). | |
833 | ||
834 | Undocumented/buggy Unicode handling (how JSON handles Unicode values is | |
835 | undocumented. One can get far by feeding it Unicode strings and doing | |
836 | en-/decoding oneself, but Unicode escapes are not working properly). | |
837 | ||
838 | No round-tripping (strings get clobbered if they look like numbers, e.g. | |
839 | the string C<2.0> will encode to C<2.0> instead of C<"2.0">, and that will | |
840 | decode into the number 2. | |
841 | ||
842 | =item JSON::PC 0.01 | |
843 | ||
844 | Very fast. | |
845 | ||
846 | Undocumented/buggy Unicode handling. | |
847 | ||
848 | No round-tripping. | |
849 | ||
850 | Has problems handling many Perl values (e.g. regex results and other magic | |
851 | values will make it croak). | |
852 | ||
853 | Does not even generate valid JSON (C<{1,2}> gets converted to C<{1:2}> | |
854 | which is not a valid JSON text. | |
855 | ||
856 | Unmaintained (maintainer unresponsive for many months, bugs are not | |
857 | getting fixed). | |
858 | ||
859 | =item JSON::Syck 0.21 | |
860 | ||
861 | Very buggy (often crashes). | |
862 | ||
863 | Very inflexible (no human-readable format supported, format pretty much | |
864 | undocumented. I need at least a format for easy reading by humans and a | |
865 | single-line compact format for use in a protocol, and preferably a way to | |
866 | generate ASCII-only JSON texts). | |
867 | ||
868 | Completely broken (and confusingly documented) Unicode handling (Unicode | |
869 | escapes are not working properly, you need to set ImplicitUnicode to | |
870 | I<different> values on en- and decoding to get symmetric behaviour). | |
871 | ||
872 | No round-tripping (simple cases work, but this depends on whether the scalar | |
873 | value was used in a numeric context or not). | |
874 | ||
875 | Dumping hashes may skip hash values depending on iterator state. | |
876 | ||
877 | Unmaintained (maintainer unresponsive for many months, bugs are not | |
878 | getting fixed). | |
879 | ||
880 | Does not check input for validity (i.e. will accept non-JSON input and | |
881 | return "something" instead of raising an exception. This is a security | |
882 | issue: imagine two banks transferring money between each other using | |
883 | JSON. One bank might parse a given non-JSON request and deduct money, | |
884 | while the other might reject the transaction with a syntax error. While a | |
885 | good protocol will at least recover, that is extra unnecessary work and | |
886 | the transaction will still not succeed). | |
887 | ||
888 | =item JSON::DWIW 0.04 | |
889 | ||
890 | Very fast. Very natural. Very nice. | |
891 | ||
892 | Undocumented Unicode handling (but the best of the pack. Unicode escapes | |
893 | still don't get parsed properly). | |
894 | ||
895 | Very inflexible. | |
896 | ||
897 | No round-tripping. | |
898 | ||
899 | Does not generate valid JSON texts (key strings are often unquoted, empty keys | |
900 | result in nothing being output) | |
901 | ||
902 | Does not check input for validity. | |
1114 | =item C<utf8> flag disabled | |
1115 | ||
1116 | When C<utf8> is disabled (the default), then C<encode>/C<decode> generate | |
1117 | and expect Unicode strings, that is, characters with high ordinal Unicode | |
1118 | values (> 255) will be encoded as such characters, and likewise such | |
1119 | characters are decoded as-is, no canges to them will be done, except | |
1120 | "(re-)interpreting" them as Unicode codepoints or Unicode characters, | |
1121 | respectively (to Perl, these are the same thing in strings unless you do | |
1122 | funny/weird/dumb stuff). | |
1123 | ||
1124 | This is useful when you want to do the encoding yourself (e.g. when you | |
1125 | want to have UTF-16 encoded JSON texts) or when some other layer does | |
1126 | the encoding for you (for example, when printing to a terminal using a | |
1127 | filehandle that transparently encodes to UTF-8 you certainly do NOT want | |
1128 | to UTF-8 encode your data first and have Perl encode it another time). | |
1129 | ||
1130 | =item C<utf8> flag enabled | |
1131 | ||
1132 | If the C<utf8>-flag is enabled, C<encode>/C<decode> will encode all | |
1133 | characters using the corresponding UTF-8 multi-byte sequence, and will | |
1134 | expect your input strings to be encoded as UTF-8, that is, no "character" | |
1135 | of the input string must have any value > 255, as UTF-8 does not allow | |
1136 | that. | |
1137 | ||
1138 | The C<utf8> flag therefore switches between two modes: disabled means you | |
1139 | will get a Unicode string in Perl, enabled means you get an UTF-8 encoded | |
1140 | octet/binary string in Perl. | |
1141 | ||
1142 | =item C<latin1> or C<ascii> flags enabled | |
1143 | ||
1144 | With C<latin1> (or C<ascii>) enabled, C<encode> will escape characters | |
1145 | with ordinal values > 255 (> 127 with C<ascii>) and encode the remaining | |
1146 | characters as specified by the C<utf8> flag. | |
1147 | ||
1148 | If C<utf8> is disabled, then the result is also correctly encoded in those | |
1149 | character sets (as both are proper subsets of Unicode, meaning that a | |
1150 | Unicode string with all character values < 256 is the same thing as a | |
1151 | ISO-8859-1 string, and a Unicode string with all character values < 128 is | |
1152 | the same thing as an ASCII string in Perl). | |
1153 | ||
1154 | If C<utf8> is enabled, you still get a correct UTF-8-encoded string, | |
1155 | regardless of these flags, just some more characters will be escaped using | |
1156 | C<\uXXXX> then before. | |
1157 | ||
1158 | Note that ISO-8859-1-I<encoded> strings are not compatible with UTF-8 | |
1159 | encoding, while ASCII-encoded strings are. That is because the ISO-8859-1 | |
1160 | encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 I<codeset> being | |
1161 | a subset of Unicode), while ASCII is. | |
1162 | ||
1163 | Surprisingly, C<decode> will ignore these flags and so treat all input | |
1164 | values as governed by the C<utf8> flag. If it is disabled, this allows you | |
1165 | to decode ISO-8859-1- and ASCII-encoded strings, as both strict subsets of | |
1166 | Unicode. If it is enabled, you can correctly decode UTF-8 encoded strings. | |
1167 | ||
1168 | So neither C<latin1> nor C<ascii> are incompatible with the C<utf8> flag - | |
1169 | they only govern when the JSON output engine escapes a character or not. | |
1170 | ||
1171 | The main use for C<latin1> is to relatively efficiently store binary data | |
1172 | as JSON, at the expense of breaking compatibility with most JSON decoders. | |
1173 | ||
1174 | The main use for C<ascii> is to force the output to not contain characters | |
1175 | with values > 127, which means you can interpret the resulting string | |
1176 | as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about any character set and | |
1177 | 8-bit-encoding, and still get the same data structure back. This is useful | |
1178 | when your channel for JSON transfer is not 8-bit clean or the encoding | |
1179 | might be mangled in between (e.g. in mail), and works because ASCII is a | |
1180 | proper subset of most 8-bit and multibyte encodings in use in the world. | |
903 | 1181 | |
904 | 1182 | =back |
905 | 1183 | |
906 | 1184 | |
907 | 1185 | =head2 JSON and YAML |
908 | 1186 | |
909 | You often hear that JSON is a subset (or a close subset) of YAML. This is, | |
910 | however, a mass hysteria and very far from the truth. In general, there is | |
911 | no way to configure JSON::XS to output a data structure as valid YAML. | |
1187 | You often hear that JSON is a subset of YAML. This is, however, a mass | |
1188 | hysteria(*) and very far from the truth (as of the time of this writing), | |
1189 | so let me state it clearly: I<in general, there is no way to configure | |
1190 | JSON::XS to output a data structure as valid YAML> that works in all | |
1191 | cases. | |
912 | 1192 | |
913 | 1193 | If you really must use JSON::XS to generate YAML, you should use this |
914 | 1194 | algorithm (subject to change in future versions): |
916 | 1196 | my $to_yaml = JSON::XS->new->utf8->space_after (1); |
917 | 1197 | my $yaml = $to_yaml->encode ($ref) . "\n"; |
918 | 1198 | |
919 | This will usually generate JSON texts that also parse as valid | |
1199 | This will I<usually> generate JSON texts that also parse as valid | |
920 | 1200 | YAML. Please note that YAML has hardcoded limits on (simple) object key |
921 | lengths that JSON doesn't have, so you should make sure that your hash | |
922 | keys are noticeably shorter than the 1024 characters YAML allows. | |
923 | ||
924 | There might be other incompatibilities that I am not aware of. In general | |
925 | you should not try to generate YAML with a JSON generator or vice versa, | |
926 | or try to parse JSON with a YAML parser or vice versa: chances are high | |
927 | that you will run into severe interoperability problems. | |
1201 | lengths that JSON doesn't have and also has different and incompatible | |
1202 | unicode handling, so you should make sure that your hash keys are | |
1203 | noticeably shorter than the 1024 "stream characters" YAML allows and that | |
1204 | you do not have characters with codepoint values outside the Unicode BMP | |
1205 | (basic multilingual page). YAML also does not allow C<\/> sequences in | |
1206 | strings (which JSON::XS does not I<currently> generate, but other JSON | |
1207 | generators might). | |
1208 | ||
1209 | There might be other incompatibilities that I am not aware of (or the YAML | |
1210 | specification has been changed yet again - it does so quite often). In | |
1211 | general you should not try to generate YAML with a JSON generator or vice | |
1212 | versa, or try to parse JSON with a YAML parser or vice versa: chances are | |
1213 | high that you will run into severe interoperability problems when you | |
1214 | least expect it. | |
1215 | ||
1216 | =over 4 | |
1217 | ||
1218 | =item (*) | |
1219 | ||
1220 | I have been pressured multiple times by Brian Ingerson (one of the | |
1221 | authors of the YAML specification) to remove this paragraph, despite him | |
1222 | acknowledging that the actual incompatibilities exist. As I was personally | |
1223 | bitten by this "JSON is YAML" lie, I refused and said I will continue to | |
1224 | educate people about these issues, so others do not run into the same | |
1225 | problem again and again. After this, Brian called me a (quote)I<complete | |
1226 | and worthless idiot>(unquote). | |
1227 | ||
1228 | In my opinion, instead of pressuring and insulting people who actually | |
1229 | clarify issues with YAML and the wrong statements of some of its | |
1230 | proponents, I would kindly suggest reading the JSON spec (which is not | |
1231 | that difficult or long) and finally make YAML compatible to it, and | |
1232 | educating users about the changes, instead of spreading lies about the | |
1233 | real compatibility for many I<years> and trying to silence people who | |
1234 | point out that it isn't true. | |
1235 | ||
1236 | =back | |
928 | 1237 | |
929 | 1238 | |
930 | 1239 | =head2 SPEED |
934 | 1243 | in the JSON::XS distribution, to make it easy to compare on your own |
935 | 1244 | system. |
936 | 1245 | |
937 | First comes a comparison between various modules using a very short | |
938 | single-line JSON string: | |
939 | ||
940 | {"method": "handleMessage", "params": ["user1", "we were just talking"], \ | |
941 | "id": null, "array":[1,11,234,-5,1e5,1e7, true, false]} | |
1246 | First comes a comparison between various modules using | |
1247 | a very short single-line JSON string (also available at | |
1248 | L<http://dist.schmorp.de/misc/json/short.json>). | |
1249 | ||
1250 | {"method": "handleMessage", "params": ["user1", | |
1251 | "we were just talking"], "id": null, "array":[1,11,234,-5,1e5,1e7, | |
1252 | true, false]} | |
942 | 1253 | |
943 | 1254 | It shows the number of encodes/decodes per second (JSON::XS uses |
944 | 1255 | the functional interface, while JSON::XS/2 uses the OO interface |
964 | 1275 | favourably to Storable for small amounts of data. |
965 | 1276 | |
966 | 1277 | Using a longer test string (roughly 18KB, generated from Yahoo! Locals |
967 | search API (http://nanoref.com/yahooapis/mgPdGg): | |
1278 | search API (L<http://dist.schmorp.de/misc/json/long.json>). | |
968 | 1279 | |
969 | 1280 | module | encode | decode | |
970 | 1281 | -----------|------------|------------| |
1011 | 1322 | arrays. The C stack is a limited resource: for instance, on my amd64 |
1012 | 1323 | machine with 8MB of stack size I can decode around 180k nested arrays but |
1013 | 1324 | only 14k nested JSON objects (due to perl itself recursing deeply on croak |
1014 | to free the temporary). If that is exceeded, the program crashes. to be | |
1325 | to free the temporary). If that is exceeded, the program crashes. To be | |
1015 | 1326 | conservative, the default nesting limit is set to 512. If your process |
1016 | 1327 | has a smaller stack, you should adjust this setting accordingly with the |
1017 | 1328 | C<max_depth> method. |
1018 | 1329 | |
1019 | And last but least, something else could bomb you that I forgot to think | |
1020 | of. In that case, you get to keep the pieces. I am always open for hints, | |
1021 | though... | |
1330 | Something else could bomb you, too, that I forgot to think of. In that | |
1331 | case, you get to keep the pieces. I am always open for hints, though... | |
1332 | ||
1333 | Also keep in mind that JSON::XS might leak contents of your Perl data | |
1334 | structures in its error messages, so when you serialise sensitive | |
1335 | information you might want to make sure that exceptions thrown by JSON::XS | |
1336 | will not end up in front of untrusted eyes. | |
1022 | 1337 | |
1023 | 1338 | If you are using JSON::XS to return packets to consumption |
1024 | 1339 | by JavaScript scripts in a browser you should have a look at |
1025 | 1340 | L<http://jpsykes.com/47/practical-csrf-and-json-security> to see whether |
1026 | 1341 | you are vulnerable to some common attack vectors (which really are browser |
1027 | 1342 | design bugs, but it is still you who will have to deal with it, as major |
1028 | browser developers care only for features, not about doing security | |
1343 | browser developers care only for features, not about getting security | |
1029 | 1344 | right). |
1030 | 1345 | |
1031 | 1346 | |
1034 | 1349 | This module is I<not> guaranteed to be thread safe and there are no |
1035 | 1350 | plans to change this until Perl gets thread support (as opposed to the |
1036 | 1351 | horribly slow so-called "threads" which are simply slow and bloated |
1037 | process simulations - use fork, its I<much> faster, cheaper, better). | |
1352 | process simulations - use fork, it's I<much> faster, cheaper, better). | |
1038 | 1353 | |
1039 | 1354 | (It might actually work, but you have been warned). |
1040 | 1355 | |
1042 | 1357 | =head1 BUGS |
1043 | 1358 | |
1044 | 1359 | While the goal of this module is to be correct, that unfortunately does |
1045 | not mean its bug-free, only that I think its design is bug-free. It is | |
1046 | still relatively early in its development. If you keep reporting bugs they | |
1047 | will be fixed swiftly, though. | |
1360 | not mean it's bug-free, only that I think its design is bug-free. If you | |
1361 | keep reporting bugs they will be fixed swiftly, though. | |
1048 | 1362 | |
1049 | 1363 | Please refrain from using rt.cpan.org or any other bug reporting |
1050 | 1364 | service. I put the contact address into my modules for a reason. |
1074 | 1388 | |
1075 | 1389 | 1; |
1076 | 1390 | |
1391 | =head1 SEE ALSO | |
1392 | ||
1393 | The F<json_xs> command line utility for quick experiments. | |
1394 | ||
1077 | 1395 | =head1 AUTHOR |
1078 | 1396 | |
1079 | 1397 | Marc Lehmann <schmorp@schmorp.de> |
5 | 5 | #include <string.h> |
6 | 6 | #include <stdlib.h> |
7 | 7 | #include <stdio.h> |
8 | #include <limits.h> | |
8 | 9 | #include <float.h> |
9 | 10 | |
10 | 11 | #if defined(__BORLANDC__) || defined(_MSC_VER) |
16 | 17 | #ifndef UTF8_MAXBYTES |
17 | 18 | # define UTF8_MAXBYTES 13 |
18 | 19 | #endif |
20 | ||
21 | #define IVUV_MAXCHARS (sizeof (UV) * CHAR_BIT * 28 / 93 + 2) | |
19 | 22 | |
20 | 23 | #define F_ASCII 0x00000001UL |
21 | 24 | #define F_LATIN1 0x00000002UL |
29 | 32 | #define F_ALLOW_BLESSED 0x00000400UL |
30 | 33 | #define F_CONV_BLESSED 0x00000800UL |
31 | 34 | #define F_RELAXED 0x00001000UL |
32 | ||
33 | #define F_MAXDEPTH 0xf8000000UL | |
34 | #define S_MAXDEPTH 27 | |
35 | #define F_MAXSIZE 0x01f00000UL | |
36 | #define S_MAXSIZE 20 | |
35 | #define F_ALLOW_UNKNOWN 0x00002000UL | |
37 | 36 | #define F_HOOK 0x00080000UL // some hooks exist, so slow-path processing |
38 | 37 | |
39 | #define DEC_DEPTH(flags) (1UL << ((flags & F_MAXDEPTH) >> S_MAXDEPTH)) | |
40 | #define DEC_SIZE(flags) (1UL << ((flags & F_MAXSIZE ) >> S_MAXSIZE )) | |
41 | ||
42 | 38 | #define F_PRETTY F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER |
43 | #define F_DEFAULT (9UL << S_MAXDEPTH) | |
44 | 39 | |
45 | 40 | #define INIT_SIZE 32 // initial scalar size to be allocated |
46 | 41 | #define INDENT_STEP 3 // spaces per indentation level |
51 | 46 | #define SE } while (0) |
52 | 47 | |
53 | 48 | #if __GNUC__ >= 3 |
54 | # define expect(expr,value) __builtin_expect ((expr),(value)) | |
55 | # define inline inline | |
49 | # define expect(expr,value) __builtin_expect ((expr), (value)) | |
50 | # define INLINE static inline | |
56 | 51 | #else |
57 | 52 | # define expect(expr,value) (expr) |
58 | # define inline static | |
53 | # define INLINE static | |
59 | 54 | #endif |
60 | 55 | |
61 | 56 | #define expect_false(expr) expect ((expr) != 0, 0) |
62 | 57 | #define expect_true(expr) expect ((expr) != 0, 1) |
58 | ||
59 | #define IN_RANGE_INC(type,val,beg,end) \ | |
60 | ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ | |
61 | <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) | |
62 | ||
63 | #define ERR_NESTING_EXCEEDED "json text or perl structure exceeds maximum nesting level (max_depth set too low?)" | |
63 | 64 | |
64 | 65 | #ifdef USE_ITHREADS |
65 | 66 | # define JSON_SLOW 1 |
72 | 73 | static HV *json_stash, *json_boolean_stash; // JSON::XS:: |
73 | 74 | static SV *json_true, *json_false; |
74 | 75 | |
76 | enum { | |
77 | INCR_M_WS = 0, // initial whitespace skipping, must be 0 | |
78 | INCR_M_STR, // inside string | |
79 | INCR_M_BS, // inside backslash | |
80 | INCR_M_JSON // outside anything, count nesting | |
81 | }; | |
82 | ||
83 | #define INCR_DONE(json) ((json)->incr_nest <= 0 && (json)->incr_mode == INCR_M_JSON) | |
84 | ||
75 | 85 | typedef struct { |
76 | 86 | U32 flags; |
87 | U32 max_depth; | |
88 | STRLEN max_size; | |
89 | ||
77 | 90 | SV *cb_object; |
78 | 91 | HV *cb_sk_object; |
92 | ||
93 | // for the incremental parser | |
94 | SV *incr_text; // the source text so far | |
95 | STRLEN incr_pos; // the current offset into the text | |
96 | int incr_nest; // {[]}-nesting level | |
97 | unsigned char incr_mode; | |
79 | 98 | } JSON; |
99 | ||
100 | INLINE void | |
101 | json_init (JSON *json) | |
102 | { | |
103 | Zero (json, 1, JSON); | |
104 | json->max_depth = 512; | |
105 | } | |
80 | 106 | |
81 | 107 | ///////////////////////////////////////////////////////////////////////////// |
82 | 108 | // utility functions |
83 | 109 | |
84 | inline void | |
110 | INLINE SV * | |
111 | get_bool (const char *name) | |
112 | { | |
113 | SV *sv = get_sv (name, 1); | |
114 | ||
115 | SvREADONLY_on (sv); | |
116 | SvREADONLY_on (SvRV (sv)); | |
117 | ||
118 | return sv; | |
119 | } | |
120 | ||
121 | INLINE void | |
85 | 122 | shrink (SV *sv) |
86 | 123 | { |
87 | 124 | sv_utf8_downgrade (sv, 1); |
125 | ||
88 | 126 | if (SvLEN (sv) > SvCUR (sv) + 1) |
89 | 127 | { |
90 | 128 | #ifdef SvPV_shrink_to_cur |
100 | 138 | // we special-case "safe" characters from U+80 .. U+7FF, |
101 | 139 | // but use the very good perl function to parse anything else. |
102 | 140 | // note that we never call this function for a ascii codepoints |
103 | inline UV | |
141 | INLINE UV | |
104 | 142 | decode_utf8 (unsigned char *s, STRLEN len, STRLEN *clen) |
105 | 143 | { |
106 | if (expect_false (s[0] > 0xdf || s[0] < 0xc2)) | |
107 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); | |
108 | else if (len > 1 && s[1] >= 0x80 && s[1] <= 0xbf) | |
144 | if (expect_true (len >= 2 | |
145 | && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) | |
146 | && IN_RANGE_INC (char, s[1], 0x80, 0xbf))) | |
109 | 147 | { |
110 | 148 | *clen = 2; |
111 | 149 | return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); |
112 | 150 | } |
113 | 151 | else |
114 | { | |
115 | *clen = (STRLEN)-1; | |
116 | return (UV)-1; | |
117 | } | |
152 | return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); | |
153 | } | |
154 | ||
155 | // likewise for encoding, also never called for ascii codepoints | |
156 | // this function takes advantage of this fact, although current gccs | |
157 | // seem to optimise the check for >= 0x80 away anyways | |
158 | INLINE unsigned char * | |
159 | encode_utf8 (unsigned char *s, UV ch) | |
160 | { | |
161 | if (expect_false (ch < 0x000080)) | |
162 | *s++ = ch; | |
163 | else if (expect_true (ch < 0x000800)) | |
164 | *s++ = 0xc0 | ( ch >> 6), | |
165 | *s++ = 0x80 | ( ch & 0x3f); | |
166 | else if ( ch < 0x010000) | |
167 | *s++ = 0xe0 | ( ch >> 12), | |
168 | *s++ = 0x80 | ((ch >> 6) & 0x3f), | |
169 | *s++ = 0x80 | ( ch & 0x3f); | |
170 | else if ( ch < 0x110000) | |
171 | *s++ = 0xf0 | ( ch >> 18), | |
172 | *s++ = 0x80 | ((ch >> 12) & 0x3f), | |
173 | *s++ = 0x80 | ((ch >> 6) & 0x3f), | |
174 | *s++ = 0x80 | ( ch & 0x3f); | |
175 | ||
176 | return s; | |
118 | 177 | } |
119 | 178 | |
120 | 179 | ///////////////////////////////////////////////////////////////////////////// |
128 | 187 | SV *sv; // result scalar |
129 | 188 | JSON json; |
130 | 189 | U32 indent; // indentation level |
131 | U32 maxdepth; // max. indentation/recursion level | |
190 | UV limit; // escape character values >= this value when encoding | |
132 | 191 | } enc_t; |
133 | 192 | |
134 | inline void | |
193 | INLINE void | |
135 | 194 | need (enc_t *enc, STRLEN len) |
136 | 195 | { |
137 | 196 | if (expect_false (enc->cur + len >= enc->end)) |
138 | 197 | { |
139 | STRLEN cur = enc->cur - SvPVX (enc->sv); | |
198 | STRLEN cur = enc->cur - (char *)SvPVX (enc->sv); | |
140 | 199 | SvGROW (enc->sv, cur + len + 1); |
141 | 200 | enc->cur = SvPVX (enc->sv) + cur; |
142 | 201 | enc->end = SvPVX (enc->sv) + SvLEN (enc->sv) - 1; |
143 | 202 | } |
144 | 203 | } |
145 | 204 | |
146 | inline void | |
205 | INLINE void | |
147 | 206 | encode_ch (enc_t *enc, char ch) |
148 | 207 | { |
149 | 208 | need (enc, 1); |
207 | 266 | clen = 1; |
208 | 267 | } |
209 | 268 | |
210 | if (uch > 0x10FFFFUL) | |
211 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); | |
212 | ||
213 | if (uch < 0x80 || enc->json.flags & F_ASCII || (enc->json.flags & F_LATIN1 && uch > 0xFF)) | |
269 | if (uch < 0x80/*0x20*/ || uch >= enc->limit) | |
214 | 270 | { |
215 | if (uch > 0xFFFFUL) | |
271 | if (uch >= 0x10000UL) | |
216 | 272 | { |
273 | if (uch >= 0x110000UL) | |
274 | croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch); | |
275 | ||
217 | 276 | need (enc, len += 11); |
218 | 277 | sprintf (enc->cur, "\\u%04x\\u%04x", |
219 | 278 | (int)((uch - 0x10000) / 0x400 + 0xD800), |
222 | 281 | } |
223 | 282 | else |
224 | 283 | { |
225 | static char hexdigit [16] = "0123456789abcdef"; | |
226 | 284 | need (enc, len += 5); |
227 | 285 | *enc->cur++ = '\\'; |
228 | 286 | *enc->cur++ = 'u'; |
229 | *enc->cur++ = hexdigit [ uch >> 12 ]; | |
230 | *enc->cur++ = hexdigit [(uch >> 8) & 15]; | |
231 | *enc->cur++ = hexdigit [(uch >> 4) & 15]; | |
232 | *enc->cur++ = hexdigit [(uch >> 0) & 15]; | |
287 | *enc->cur++ = PL_hexdigit [ uch >> 12 ]; | |
288 | *enc->cur++ = PL_hexdigit [(uch >> 8) & 15]; | |
289 | *enc->cur++ = PL_hexdigit [(uch >> 4) & 15]; | |
290 | *enc->cur++ = PL_hexdigit [(uch >> 0) & 15]; | |
233 | 291 | } |
234 | 292 | |
235 | 293 | str += clen; |
251 | 309 | else |
252 | 310 | { |
253 | 311 | need (enc, len += UTF8_MAXBYTES - 1); // never more than 11 bytes needed |
254 | enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0); | |
312 | enc->cur = encode_utf8 (enc->cur, uch); | |
255 | 313 | ++str; |
256 | 314 | } |
257 | 315 | } |
262 | 320 | } |
263 | 321 | } |
264 | 322 | |
265 | inline void | |
323 | INLINE void | |
266 | 324 | encode_indent (enc_t *enc) |
267 | 325 | { |
268 | 326 | if (enc->json.flags & F_INDENT) |
275 | 333 | } |
276 | 334 | } |
277 | 335 | |
278 | inline void | |
336 | INLINE void | |
279 | 337 | encode_space (enc_t *enc) |
280 | 338 | { |
281 | 339 | need (enc, 1); |
282 | 340 | encode_ch (enc, ' '); |
283 | 341 | } |
284 | 342 | |
285 | inline void | |
343 | INLINE void | |
286 | 344 | encode_nl (enc_t *enc) |
287 | 345 | { |
288 | 346 | if (enc->json.flags & F_INDENT) |
292 | 350 | } |
293 | 351 | } |
294 | 352 | |
295 | inline void | |
353 | INLINE void | |
296 | 354 | encode_comma (enc_t *enc) |
297 | 355 | { |
298 | 356 | encode_ch (enc, ','); |
310 | 368 | { |
311 | 369 | int i, len = av_len (av); |
312 | 370 | |
313 | if (enc->indent >= enc->maxdepth) | |
314 | croak ("data structure too deep (hit recursion limit)"); | |
371 | if (enc->indent >= enc->json.max_depth) | |
372 | croak (ERR_NESTING_EXCEEDED); | |
315 | 373 | |
316 | 374 | encode_ch (enc, '['); |
317 | 375 | |
395 | 453 | encode_hv (enc_t *enc, HV *hv) |
396 | 454 | { |
397 | 455 | HE *he; |
398 | int count; | |
399 | ||
400 | if (enc->indent >= enc->maxdepth) | |
401 | croak ("data structure too deep (hit recursion limit)"); | |
456 | ||
457 | if (enc->indent >= enc->json.max_depth) | |
458 | croak (ERR_NESTING_EXCEEDED); | |
402 | 459 | |
403 | 460 | encode_ch (enc, '{'); |
404 | 461 | |
405 | 462 | // for canonical output we have to sort by keys first |
406 | 463 | // actually, this is mostly due to the stupid so-called |
407 | // security workaround added somewhere in 5.8.x. | |
464 | // security workaround added somewhere in 5.8.x | |
408 | 465 | // that randomises hash orderings |
409 | 466 | if (enc->json.flags & F_CANONICAL) |
410 | 467 | { |
589 | 646 | encode_str (enc, "true", 4, 0); |
590 | 647 | else if (len == 1 && *pv == '0') |
591 | 648 | encode_str (enc, "false", 5, 0); |
649 | else if (enc->json.flags & F_ALLOW_UNKNOWN) | |
650 | encode_str (enc, "null", 4, 0); | |
592 | 651 | else |
593 | 652 | croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1", |
594 | 653 | SvPV_nolen (sv_2mortal (newRV_inc (sv)))); |
595 | 654 | } |
655 | else if (enc->json.flags & F_ALLOW_UNKNOWN) | |
656 | encode_str (enc, "null", 4, 0); | |
596 | 657 | else |
597 | 658 | croak ("encountered %s, but JSON can only represent references to arrays or hashes", |
598 | 659 | SvPV_nolen (sv_2mortal (newRV_inc (sv)))); |
620 | 681 | } |
621 | 682 | else if (SvIOKp (sv)) |
622 | 683 | { |
623 | // we assume we can always read an IV as a UV | |
624 | if (SvUV (sv) & ~(UV)0x7fff) | |
625 | { | |
626 | // large integer, use the (rather slow) snprintf way. | |
627 | need (enc, sizeof (UV) * 3); | |
628 | enc->cur += | |
629 | SvIsUV(sv) | |
630 | ? snprintf (enc->cur, sizeof (UV) * 3, "%"UVuf, (UV)SvUVX (sv)) | |
631 | : snprintf (enc->cur, sizeof (UV) * 3, "%"IVdf, (IV)SvIVX (sv)); | |
632 | } | |
633 | else | |
684 | // we assume we can always read an IV as a UV and vice versa | |
685 | // we assume two's complement | |
686 | // we assume no aliasing issues in the union | |
687 | if (SvIsUV (sv) ? SvUVX (sv) <= 59000 | |
688 | : SvIVX (sv) <= 59000 && SvIVX (sv) >= -59000) | |
634 | 689 | { |
635 | 690 | // optimise the "small number case" |
636 | 691 | // code will likely be branchless and use only a single multiplication |
637 | I32 i = SvIV (sv); | |
692 | // works for numbers up to 59074 | |
693 | I32 i = SvIVX (sv); | |
638 | 694 | U32 u; |
639 | 695 | char digit, nz = 0; |
640 | 696 | |
650 | 706 | // and multiplying by 5 while moving the decimal point one to the right, |
651 | 707 | // resulting in a net multiplication by 10. |
652 | 708 | // we always write the digit to memory but conditionally increment |
653 | // the pointer, to ease the usage of conditional move instructions. | |
654 | digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffff) * 5; | |
655 | digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffff) * 5; | |
656 | digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffff) * 5; | |
657 | digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffff) * 5; | |
709 | // the pointer, to enable the use of conditional move instructions. | |
710 | digit = u >> 28; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0xfffffffUL) * 5; | |
711 | digit = u >> 27; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x7ffffffUL) * 5; | |
712 | digit = u >> 26; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x3ffffffUL) * 5; | |
713 | digit = u >> 25; *enc->cur = digit + '0'; enc->cur += (nz = nz || digit); u = (u & 0x1ffffffUL) * 5; | |
658 | 714 | digit = u >> 24; *enc->cur = digit + '0'; enc->cur += 1; // correctly generate '0' |
659 | 715 | } |
716 | else | |
717 | { | |
718 | // large integer, use the (rather slow) snprintf way. | |
719 | need (enc, IVUV_MAXCHARS); | |
720 | enc->cur += | |
721 | SvIsUV(sv) | |
722 | ? snprintf (enc->cur, IVUV_MAXCHARS, "%"UVuf, (UV)SvUVX (sv)) | |
723 | : snprintf (enc->cur, IVUV_MAXCHARS, "%"IVdf, (IV)SvIVX (sv)); | |
724 | } | |
660 | 725 | } |
661 | 726 | else if (SvROK (sv)) |
662 | 727 | encode_rv (enc, SvRV (sv)); |
663 | else if (!SvOK (sv)) | |
728 | else if (!SvOK (sv) || enc->json.flags & F_ALLOW_UNKNOWN) | |
664 | 729 | encode_str (enc, "null", 4, 0); |
665 | 730 | else |
666 | 731 | croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this", |
680 | 745 | enc.cur = SvPVX (enc.sv); |
681 | 746 | enc.end = SvEND (enc.sv); |
682 | 747 | enc.indent = 0; |
683 | enc.maxdepth = DEC_DEPTH (enc.json.flags); | |
748 | enc.limit = enc.json.flags & F_ASCII ? 0x000080UL | |
749 | : enc.json.flags & F_LATIN1 ? 0x000100UL | |
750 | : 0x110000UL; | |
684 | 751 | |
685 | 752 | SvPOK_only (enc.sv); |
686 | 753 | encode_sv (&enc, scalar); |
711 | 778 | U32 maxdepth; // recursion depth limit |
712 | 779 | } dec_t; |
713 | 780 | |
714 | inline void | |
781 | INLINE void | |
715 | 782 | decode_comment (dec_t *dec) |
716 | 783 | { |
717 | 784 | // only '#'-style comments allowed a.t.m. |
720 | 787 | ++dec->cur; |
721 | 788 | } |
722 | 789 | |
723 | inline void | |
790 | INLINE void | |
724 | 791 | decode_ws (dec_t *dec) |
725 | 792 | { |
726 | 793 | for (;;) |
754 | 821 | ++dec->cur; \ |
755 | 822 | SE |
756 | 823 | |
757 | #define DEC_INC_DEPTH if (++dec->depth > dec->maxdepth) ERR ("json datastructure exceeds maximum nesting level (set a higher max_depth)") | |
824 | #define DEC_INC_DEPTH if (++dec->depth > dec->json.max_depth) ERR (ERR_NESTING_EXCEEDED) | |
758 | 825 | #define DEC_DEC_DEPTH --dec->depth |
759 | 826 | |
760 | 827 | static SV *decode_sv (dec_t *dec); |
856 | 923 | { |
857 | 924 | utf8 = 1; |
858 | 925 | |
859 | cur = (char *)uvuni_to_utf8_flags (cur, hi, 0); | |
926 | cur = encode_utf8 (cur, hi); | |
860 | 927 | } |
861 | 928 | else |
862 | 929 | *cur++ = hi; |
868 | 935 | ERR ("illegal backslash escape sequence in string"); |
869 | 936 | } |
870 | 937 | } |
871 | else if (expect_true (ch >= 0x20 && ch <= 0x7f)) | |
938 | else if (expect_true (ch >= 0x20 && ch < 0x80)) | |
872 | 939 | *cur++ = ch; |
873 | 940 | else if (ch >= 0x80) |
874 | 941 | { |
1001 | 1068 | { |
1002 | 1069 | int len = dec->cur - start; |
1003 | 1070 | |
1004 | // special case the rather common 1..4-digit-int case, assumes 32 bit ints or so | |
1071 | // special case the rather common 1..5-digit-int case | |
1005 | 1072 | if (*start == '-') |
1006 | 1073 | switch (len) |
1007 | 1074 | { |
1008 | case 2: return newSViv (-( start [1] - '0' * 1)); | |
1009 | case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); | |
1010 | case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); | |
1011 | case 5: return newSViv (-(start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); | |
1075 | case 2: return newSViv (-( start [1] - '0' * 1)); | |
1076 | case 3: return newSViv (-( start [1] * 10 + start [2] - '0' * 11)); | |
1077 | case 4: return newSViv (-( start [1] * 100 + start [2] * 10 + start [3] - '0' * 111)); | |
1078 | case 5: return newSViv (-( start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 1111)); | |
1079 | case 6: return newSViv (-(start [1] * 10000 + start [2] * 1000 + start [3] * 100 + start [4] * 10 + start [5] - '0' * 11111)); | |
1012 | 1080 | } |
1013 | 1081 | else |
1014 | 1082 | switch (len) |
1015 | 1083 | { |
1016 | case 1: return newSViv ( start [0] - '0' * 1); | |
1017 | case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); | |
1018 | case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); | |
1019 | case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); | |
1084 | case 1: return newSViv ( start [0] - '0' * 1); | |
1085 | case 2: return newSViv ( start [0] * 10 + start [1] - '0' * 11); | |
1086 | case 3: return newSViv ( start [0] * 100 + start [1] * 10 + start [2] - '0' * 111); | |
1087 | case 4: return newSViv ( start [0] * 1000 + start [1] * 100 + start [2] * 10 + start [3] - '0' * 1111); | |
1088 | case 5: return newSViv ( start [0] * 10000 + start [1] * 1000 + start [2] * 100 + start [3] * 10 + start [4] - '0' * 11111); | |
1020 | 1089 | } |
1021 | 1090 | |
1022 | 1091 | { |
1134 | 1203 | |
1135 | 1204 | for (;;) |
1136 | 1205 | { |
1137 | // the >= 0x80 is true on most architectures | |
1206 | // the >= 0x80 is false on most architectures | |
1138 | 1207 | if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\') |
1139 | 1208 | { |
1140 | 1209 | // slow path, back up and use decode_str |
1275 | 1344 | decode_sv (dec_t *dec) |
1276 | 1345 | { |
1277 | 1346 | // the beauty of JSON: you need exactly one character lookahead |
1278 | // to parse anything. | |
1347 | // to parse everything. | |
1279 | 1348 | switch (*dec->cur) |
1280 | 1349 | { |
1281 | 1350 | case '"': ++dec->cur; return decode_str (dec); |
1282 | case '[': ++dec->cur; return decode_av (dec); | |
1283 | case '{': ++dec->cur; return decode_hv (dec); | |
1351 | case '[': ++dec->cur; return decode_av (dec); | |
1352 | case '{': ++dec->cur; return decode_hv (dec); | |
1284 | 1353 | |
1285 | 1354 | case '-': |
1286 | 1355 | case '0': case '1': case '2': case '3': case '4': |
1292 | 1361 | { |
1293 | 1362 | dec->cur += 4; |
1294 | 1363 | #if JSON_SLOW |
1295 | json_true = get_sv ("JSON::XS::true", 1); SvREADONLY_on (json_true); | |
1364 | json_true = get_bool ("JSON::XS::true"); | |
1296 | 1365 | #endif |
1297 | return SvREFCNT_inc (json_true); | |
1366 | return newSVsv (json_true); | |
1298 | 1367 | } |
1299 | 1368 | else |
1300 | 1369 | ERR ("'true' expected"); |
1306 | 1375 | { |
1307 | 1376 | dec->cur += 5; |
1308 | 1377 | #if JSON_SLOW |
1309 | json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false); | |
1378 | json_false = get_bool ("JSON::XS::false"); | |
1310 | 1379 | #endif |
1311 | return SvREFCNT_inc (json_false); | |
1380 | return newSVsv (json_false); | |
1312 | 1381 | } |
1313 | 1382 | else |
1314 | 1383 | ERR ("'false' expected"); |
1336 | 1405 | } |
1337 | 1406 | |
1338 | 1407 | static SV * |
1339 | decode_json (SV *string, JSON *json, UV *offset_return) | |
1408 | decode_json (SV *string, JSON *json, STRLEN *offset_return) | |
1340 | 1409 | { |
1341 | 1410 | dec_t dec; |
1342 | UV offset; | |
1411 | STRLEN offset; | |
1343 | 1412 | SV *sv; |
1344 | 1413 | |
1345 | 1414 | SvGETMAGIC (string); |
1346 | 1415 | SvUPGRADE (string, SVt_PV); |
1347 | 1416 | |
1348 | if (json->flags & F_MAXSIZE && SvCUR (string) > DEC_SIZE (json->flags)) | |
1417 | /* work around a bug in perl 5.10, which causes SvCUR to fail an | |
1418 | * assertion with -DDEBUGGING, although SvCUR is documented to | |
1419 | * return the xpv_cur field which certainly exists after upgrading. | |
1420 | * according to nicholas clark, calling SvPOK fixes this. | |
1421 | * But it doesn't fix it, so try another workaround, call SvPV_nolen | |
1422 | * and hope for the best. | |
1423 | * Damnit, SvPV_nolen still trips over yet another assertion. This | |
1424 | * assertion business is seriously broken, try yet another workaround | |
1425 | * for the broken -DDEBUGGING. | |
1426 | */ | |
1427 | #ifdef DEBUGGING | |
1428 | offset = SvOK (string) ? sv_len (string) : 0; | |
1429 | #else | |
1430 | offset = SvCUR (string); | |
1431 | #endif | |
1432 | ||
1433 | if (offset > json->max_size && json->max_size) | |
1349 | 1434 | croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", |
1350 | (unsigned long)SvCUR (string), (unsigned long)DEC_SIZE (json->flags)); | |
1435 | (unsigned long)SvCUR (string), (unsigned long)json->max_size); | |
1351 | 1436 | |
1352 | 1437 | if (json->flags & F_UTF8) |
1353 | 1438 | sv_utf8_downgrade (string, 0); |
1356 | 1441 | |
1357 | 1442 | SvGROW (string, SvCUR (string) + 1); // should basically be a NOP |
1358 | 1443 | |
1359 | dec.json = *json; | |
1360 | dec.cur = SvPVX (string); | |
1361 | dec.end = SvEND (string); | |
1362 | dec.err = 0; | |
1363 | dec.depth = 0; | |
1364 | dec.maxdepth = DEC_DEPTH (dec.json.flags); | |
1444 | dec.json = *json; | |
1445 | dec.cur = SvPVX (string); | |
1446 | dec.end = SvEND (string); | |
1447 | dec.err = 0; | |
1448 | dec.depth = 0; | |
1365 | 1449 | |
1366 | 1450 | if (dec.json.cb_object || dec.json.cb_sk_object) |
1367 | 1451 | dec.json.flags |= F_HOOK; |
1422 | 1506 | } |
1423 | 1507 | |
1424 | 1508 | ///////////////////////////////////////////////////////////////////////////// |
1509 | // incremental parser | |
1510 | ||
1511 | static void | |
1512 | incr_parse (JSON *self) | |
1513 | { | |
1514 | const char *p = SvPVX (self->incr_text) + self->incr_pos; | |
1515 | ||
1516 | for (;;) | |
1517 | { | |
1518 | //printf ("loop pod %d *p<%c><%s>, mode %d nest %d\n", p - SvPVX (self->incr_text), *p, p, self->incr_mode, self->incr_nest);//D | |
1519 | switch (self->incr_mode) | |
1520 | { | |
1521 | // only used for intiial whitespace skipping | |
1522 | case INCR_M_WS: | |
1523 | for (;;) | |
1524 | { | |
1525 | if (*p > 0x20) | |
1526 | { | |
1527 | self->incr_mode = INCR_M_JSON; | |
1528 | goto incr_m_json; | |
1529 | } | |
1530 | else if (!*p) | |
1531 | goto interrupt; | |
1532 | ||
1533 | ++p; | |
1534 | } | |
1535 | ||
1536 | // skip a single char inside a string (for \\-processing) | |
1537 | case INCR_M_BS: | |
1538 | if (!*p) | |
1539 | goto interrupt; | |
1540 | ||
1541 | ++p; | |
1542 | self->incr_mode = INCR_M_STR; | |
1543 | goto incr_m_str; | |
1544 | ||
1545 | // inside a string | |
1546 | case INCR_M_STR: | |
1547 | incr_m_str: | |
1548 | for (;;) | |
1549 | { | |
1550 | if (*p == '"') | |
1551 | { | |
1552 | ++p; | |
1553 | self->incr_mode = INCR_M_JSON; | |
1554 | ||
1555 | if (!self->incr_nest) | |
1556 | goto interrupt; | |
1557 | ||
1558 | goto incr_m_json; | |
1559 | } | |
1560 | else if (*p == '\\') | |
1561 | { | |
1562 | ++p; // "virtually" consumes character after \ | |
1563 | ||
1564 | if (!*p) // if at end of string we have to switch modes | |
1565 | { | |
1566 | self->incr_mode = INCR_M_BS; | |
1567 | goto interrupt; | |
1568 | } | |
1569 | } | |
1570 | else if (!*p) | |
1571 | goto interrupt; | |
1572 | ||
1573 | ++p; | |
1574 | } | |
1575 | ||
1576 | // after initial ws, outside string | |
1577 | case INCR_M_JSON: | |
1578 | incr_m_json: | |
1579 | for (;;) | |
1580 | { | |
1581 | switch (*p++) | |
1582 | { | |
1583 | case 0: | |
1584 | --p; | |
1585 | goto interrupt; | |
1586 | ||
1587 | case 0x09: | |
1588 | case 0x0a: | |
1589 | case 0x0d: | |
1590 | case 0x20: | |
1591 | if (!self->incr_nest) | |
1592 | { | |
1593 | --p; // do not eat the whitespace, let the next round do it | |
1594 | goto interrupt; | |
1595 | } | |
1596 | break; | |
1597 | ||
1598 | case '"': | |
1599 | self->incr_mode = INCR_M_STR; | |
1600 | goto incr_m_str; | |
1601 | ||
1602 | case '[': | |
1603 | case '{': | |
1604 | if (++self->incr_nest > self->max_depth) | |
1605 | croak (ERR_NESTING_EXCEEDED); | |
1606 | break; | |
1607 | ||
1608 | case ']': | |
1609 | case '}': | |
1610 | if (--self->incr_nest <= 0) | |
1611 | goto interrupt; | |
1612 | } | |
1613 | } | |
1614 | } | |
1615 | ||
1616 | modechange: | |
1617 | ; | |
1618 | } | |
1619 | ||
1620 | interrupt: | |
1621 | self->incr_pos = p - SvPVX (self->incr_text); | |
1622 | //printf ("return pos %d mode %d nest %d\n", self->incr_pos, self->incr_mode, self->incr_nest);//D | |
1623 | } | |
1624 | ||
1625 | ///////////////////////////////////////////////////////////////////////////// | |
1425 | 1626 | // XS interface functions |
1426 | 1627 | |
1427 | 1628 | MODULE = JSON::XS PACKAGE = JSON::XS |
1440 | 1641 | json_stash = gv_stashpv ("JSON::XS" , 1); |
1441 | 1642 | json_boolean_stash = gv_stashpv ("JSON::XS::Boolean", 1); |
1442 | 1643 | |
1443 | json_true = get_sv ("JSON::XS::true" , 1); SvREADONLY_on (json_true ); | |
1444 | json_false = get_sv ("JSON::XS::false", 1); SvREADONLY_on (json_false); | |
1644 | json_true = get_bool ("JSON::XS::true"); | |
1645 | json_false = get_bool ("JSON::XS::false"); | |
1445 | 1646 | } |
1446 | 1647 | |
1447 | 1648 | PROTOTYPES: DISABLE |
1456 | 1657 | { |
1457 | 1658 | SV *pv = NEWSV (0, sizeof (JSON)); |
1458 | 1659 | SvPOK_only (pv); |
1459 | Zero (SvPVX (pv), 1, JSON); | |
1460 | ((JSON *)SvPVX (pv))->flags = F_DEFAULT; | |
1660 | json_init ((JSON *)SvPVX (pv)); | |
1461 | 1661 | XPUSHs (sv_2mortal (sv_bless ( |
1462 | 1662 | newRV_noinc (pv), |
1463 | 1663 | strEQ (klass, "JSON::XS") ? JSON_STASH : gv_stashpv (klass, 1) |
1479 | 1679 | allow_blessed = F_ALLOW_BLESSED |
1480 | 1680 | convert_blessed = F_CONV_BLESSED |
1481 | 1681 | relaxed = F_RELAXED |
1682 | allow_unknown = F_ALLOW_UNKNOWN | |
1482 | 1683 | PPCODE: |
1483 | 1684 | { |
1484 | 1685 | if (enable) |
1503 | 1704 | get_allow_blessed = F_ALLOW_BLESSED |
1504 | 1705 | get_convert_blessed = F_CONV_BLESSED |
1505 | 1706 | get_relaxed = F_RELAXED |
1707 | get_allow_unknown = F_ALLOW_UNKNOWN | |
1506 | 1708 | PPCODE: |
1507 | 1709 | XPUSHs (boolSV (self->flags & ix)); |
1508 | 1710 | |
1509 | void max_depth (JSON *self, UV max_depth = 0x80000000UL) | |
1711 | void max_depth (JSON *self, U32 max_depth = 0x80000000UL) | |
1510 | 1712 | PPCODE: |
1511 | { | |
1512 | UV log2 = 0; | |
1513 | ||
1514 | if (max_depth > 0x80000000UL) max_depth = 0x80000000UL; | |
1515 | ||
1516 | while ((1UL << log2) < max_depth) | |
1517 | ++log2; | |
1518 | ||
1519 | self->flags = self->flags & ~F_MAXDEPTH | (log2 << S_MAXDEPTH); | |
1520 | ||
1713 | self->max_depth = max_depth; | |
1521 | 1714 | XPUSHs (ST (0)); |
1522 | } | |
1523 | 1715 | |
1524 | 1716 | U32 get_max_depth (JSON *self) |
1525 | 1717 | CODE: |
1526 | RETVAL = DEC_DEPTH (self->flags); | |
1718 | RETVAL = self->max_depth; | |
1527 | 1719 | OUTPUT: |
1528 | 1720 | RETVAL |
1529 | 1721 | |
1530 | void max_size (JSON *self, UV max_size = 0) | |
1722 | void max_size (JSON *self, U32 max_size = 0) | |
1531 | 1723 | PPCODE: |
1532 | { | |
1533 | UV log2 = 0; | |
1534 | ||
1535 | if (max_size > 0x80000000UL) max_size = 0x80000000UL; | |
1536 | if (max_size == 1) max_size = 2; | |
1537 | ||
1538 | while ((1UL << log2) < max_size) | |
1539 | ++log2; | |
1540 | ||
1541 | self->flags = self->flags & ~F_MAXSIZE | (log2 << S_MAXSIZE); | |
1542 | ||
1724 | self->max_size = max_size; | |
1543 | 1725 | XPUSHs (ST (0)); |
1544 | } | |
1545 | 1726 | |
1546 | 1727 | int get_max_size (JSON *self) |
1547 | 1728 | CODE: |
1548 | RETVAL = DEC_SIZE (self->flags); | |
1729 | RETVAL = self->max_size; | |
1549 | 1730 | OUTPUT: |
1550 | 1731 | RETVAL |
1551 | 1732 | |
1591 | 1772 | void decode_prefix (JSON *self, SV *jsonstr) |
1592 | 1773 | PPCODE: |
1593 | 1774 | { |
1594 | UV offset; | |
1775 | STRLEN offset; | |
1595 | 1776 | EXTEND (SP, 2); |
1596 | 1777 | PUSHs (decode_json (jsonstr, self, &offset)); |
1597 | 1778 | PUSHs (sv_2mortal (newSVuv (offset))); |
1779 | } | |
1780 | ||
1781 | void incr_parse (JSON *self, SV *jsonstr = 0) | |
1782 | PPCODE: | |
1783 | { | |
1784 | if (!self->incr_text) | |
1785 | self->incr_text = newSVpvn ("", 0); | |
1786 | ||
1787 | // append data, if any | |
1788 | if (jsonstr) | |
1789 | { | |
1790 | if (SvUTF8 (jsonstr) && !SvUTF8 (self->incr_text)) | |
1791 | { | |
1792 | /* utf-8-ness differs, need to upgrade */ | |
1793 | sv_utf8_upgrade (self->incr_text); | |
1794 | ||
1795 | if (self->incr_pos) | |
1796 | self->incr_pos = utf8_hop ((U8 *)SvPVX (self->incr_text), self->incr_pos) | |
1797 | - (U8 *)SvPVX (self->incr_text); | |
1798 | } | |
1799 | ||
1800 | { | |
1801 | STRLEN len; | |
1802 | const char *str = SvPV (jsonstr, len); | |
1803 | SvGROW (self->incr_text, SvCUR (self->incr_text) + len + 1); | |
1804 | Move (str, SvEND (self->incr_text), len, char); | |
1805 | SvCUR_set (self->incr_text, SvCUR (self->incr_text) + len); | |
1806 | *SvEND (self->incr_text) = 0; // this should basically be a nop, too, but make sure it's there | |
1807 | } | |
1808 | } | |
1809 | ||
1810 | if (GIMME_V != G_VOID) | |
1811 | do | |
1812 | { | |
1813 | STRLEN offset; | |
1814 | ||
1815 | if (!INCR_DONE (self)) | |
1816 | { | |
1817 | incr_parse (self); | |
1818 | ||
1819 | if (self->incr_pos > self->max_size && self->max_size) | |
1820 | croak ("attempted decode of JSON text of %lu bytes size, but max_size is set to %lu", | |
1821 | (unsigned long)self->incr_pos, (unsigned long)self->max_size); | |
1822 | ||
1823 | if (!INCR_DONE (self)) | |
1824 | break; | |
1825 | } | |
1826 | ||
1827 | XPUSHs (decode_json (self->incr_text, self, &offset)); | |
1828 | ||
1829 | sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + offset); | |
1830 | self->incr_pos -= offset; | |
1831 | self->incr_nest = 0; | |
1832 | self->incr_mode = 0; | |
1833 | } | |
1834 | while (GIMME_V == G_ARRAY); | |
1835 | } | |
1836 | ||
1837 | SV *incr_text (JSON *self) | |
1838 | ATTRS: lvalue | |
1839 | CODE: | |
1840 | { | |
1841 | if (self->incr_pos) | |
1842 | croak ("incr_text can not be called when the incremental parser already started parsing"); | |
1843 | ||
1844 | RETVAL = self->incr_text ? SvREFCNT_inc (self->incr_text) : &PL_sv_undef; | |
1845 | } | |
1846 | OUTPUT: | |
1847 | RETVAL | |
1848 | ||
1849 | void incr_skip (JSON *self) | |
1850 | CODE: | |
1851 | { | |
1852 | if (self->incr_pos) | |
1853 | { | |
1854 | sv_chop (self->incr_text, SvPV_nolen (self->incr_text) + self->incr_pos); | |
1855 | self->incr_pos = 0; | |
1856 | self->incr_nest = 0; | |
1857 | self->incr_mode = 0; | |
1858 | } | |
1859 | } | |
1860 | ||
1861 | void incr_reset (JSON *self) | |
1862 | CODE: | |
1863 | { | |
1864 | SvREFCNT_dec (self->incr_text); | |
1865 | self->incr_text = 0; | |
1866 | self->incr_pos = 0; | |
1867 | self->incr_nest = 0; | |
1868 | self->incr_mode = 0; | |
1598 | 1869 | } |
1599 | 1870 | |
1600 | 1871 | void DESTROY (JSON *self) |
1601 | 1872 | CODE: |
1602 | 1873 | SvREFCNT_dec (self->cb_sk_object); |
1603 | 1874 | SvREFCNT_dec (self->cb_object); |
1875 | SvREFCNT_dec (self->incr_text); | |
1604 | 1876 | |
1605 | 1877 | PROTOTYPES: ENABLE |
1606 | 1878 | |
1607 | 1879 | void encode_json (SV *scalar) |
1880 | ALIAS: | |
1881 | to_json_ = 0 | |
1882 | encode_json = F_UTF8 | |
1608 | 1883 | PPCODE: |
1609 | 1884 | { |
1610 | JSON json = { F_DEFAULT | F_UTF8 }; | |
1885 | JSON json; | |
1886 | json_init (&json); | |
1887 | json.flags |= ix; | |
1611 | 1888 | XPUSHs (encode_json (scalar, &json)); |
1612 | 1889 | } |
1613 | 1890 | |
1614 | 1891 | void decode_json (SV *jsonstr) |
1892 | ALIAS: | |
1893 | from_json_ = 0 | |
1894 | decode_json = F_UTF8 | |
1615 | 1895 | PPCODE: |
1616 | 1896 | { |
1617 | JSON json = { F_DEFAULT | F_UTF8 }; | |
1897 | JSON json; | |
1898 | json_init (&json); | |
1899 | json.flags |= ix; | |
1618 | 1900 | XPUSHs (decode_json (jsonstr, &json, 0)); |
1619 | 1901 | } |
1620 | 1902 | |
1903 |
0 | #!/opt/bin/perl | |
1 | ||
2 | =head1 NAME | |
3 | ||
4 | json_xs - JSON::XS commandline utility | |
5 | ||
6 | =head1 SYNOPSIS | |
7 | ||
8 | json_xs [-v] [-f inputformat] [-t outputformat] | |
9 | ||
10 | =head1 DESCRIPTION | |
11 | ||
12 | F<json_xs> converts between some input and output formats (one of them is | |
13 | JSON). | |
14 | ||
15 | The default input format is C<json> and the default output format is | |
16 | C<json-pretty>. | |
17 | ||
18 | =head1 OPTIONS | |
19 | ||
20 | =over 4 | |
21 | ||
22 | =item -v | |
23 | ||
24 | Be slightly more verbose. | |
25 | ||
26 | =item -f fromformat | |
27 | ||
28 | Read a file in the given format from STDIN. | |
29 | ||
30 | C<fromformat> can be one of: | |
31 | ||
32 | =over 4 | |
33 | ||
34 | =item json - a json text encoded, either utf-8, utf16-be/le, utf32-be/le | |
35 | ||
36 | =item storable - a Storable frozen value | |
37 | ||
38 | =item storable-file - a Storable file (Storable has two incompatible formats) | |
39 | ||
40 | =item clzf - Compress::LZF format (requires that module to be installed) | |
41 | ||
42 | =item yaml - YAML (avoid at all costs, requires the YAML module :) | |
43 | ||
44 | =back | |
45 | ||
46 | =item -t toformat | |
47 | ||
48 | Write the file in the given format to STDOUT. | |
49 | ||
50 | C<fromformat> can be one of: | |
51 | ||
52 | =over 4 | |
53 | ||
54 | =item json, json-utf-8 - json, utf-8 encoded | |
55 | ||
56 | =item json-pretty - as above, but pretty-printed | |
57 | ||
58 | =item json-utf-16le, json-utf-16be - little endian/big endian utf-16 | |
59 | ||
60 | =item json-utf-32le, json-utf-32be - little endian/big endian utf-32 | |
61 | ||
62 | =item storable - a Storable frozen value in network format | |
63 | ||
64 | =item storable-file - a Storable file in network format (Storable has two incompatible formats) | |
65 | ||
66 | =item clzf - Compress::LZF format | |
67 | ||
68 | =item yaml - YAML | |
69 | ||
70 | =back | |
71 | ||
72 | =back | |
73 | ||
74 | =head1 EXAMPLES | |
75 | ||
76 | json_xs -t null <isitreally.json | |
77 | ||
78 | "JSON Lint" - tries to parse the file F<isitreally.json> as JSON - if it | |
79 | is valid JSON, the command outputs nothing, otherwise it will print an | |
80 | error message and exit with non-zero exit status. | |
81 | ||
82 | <src.json json_xs >pretty.json | |
83 | ||
84 | Prettify the JSON file F<src.json> to F<dst.json>. | |
85 | ||
86 | json_xs -f storable-file <file | |
87 | ||
88 | Read the serialised Storable file F<file> and print a human-readable JSON | |
89 | version of it to STDOUT. | |
90 | ||
91 | json_xs -f storable-file -t yaml <file | |
92 | ||
93 | Same as above, but write YAML instead (not using JSON at all :) | |
94 | ||
95 | lwp-request http://cpantesters.perl.org/show/JSON-XS.json | json_xs | |
96 | ||
97 | Fetch the cpan-testers result summary C<JSON::XS> and pretty-print it. | |
98 | ||
99 | =head1 AUTHOR | |
100 | ||
101 | Copyright (C) 2008 Marc Lehmann <json@schmorp.de> | |
102 | ||
103 | =cut | |
104 | ||
105 | use strict; | |
106 | ||
107 | use Getopt::Long; | |
108 | use Storable (); | |
109 | use Encode; | |
110 | ||
111 | use JSON::XS; | |
112 | ||
113 | my $opt_verbose; | |
114 | my $opt_from = "json"; | |
115 | my $opt_to = "json-pretty"; | |
116 | ||
117 | Getopt::Long::Configure ("bundling", "no_ignore_case", "require_order"); | |
118 | ||
119 | GetOptions( | |
120 | "v" => \$opt_verbose, | |
121 | "f=s" => \$opt_from, | |
122 | "t=s" => \$opt_to, | |
123 | ) or die "Usage: $0 [-v] -f fromformat [-t toformat]\n"; | |
124 | ||
125 | my %F = ( | |
126 | "json" => sub { | |
127 | my $enc = | |
128 | /^\x00\x00\x00/s ? "utf-32be" | |
129 | : /^\x00.\x00/s ? "utf-16be" | |
130 | : /^.\x00\x00\x00/s ? "utf-32le" | |
131 | : /^.\x00.\x00/s ? "utf-16le" | |
132 | : "utf-8"; | |
133 | warn "input text encoding is $enc\n" if $opt_verbose; | |
134 | JSON::XS->new->decode (decode $enc, $_) | |
135 | }, | |
136 | "storable" => sub { Storable::thaw $_ }, | |
137 | "storable-file" => sub { open my $fh, "<", \$_; Storable::fd_retrieve $fh }, | |
138 | "clzf" => sub { require Compress::LZF; Compress::LZF::sthaw ($_) }, | |
139 | "yaml" => sub { require YAML; YAML::Load ($_) }, | |
140 | ); | |
141 | ||
142 | my %T = ( | |
143 | "null" => sub { "" }, | |
144 | "json" => sub { encode_json $_ }, | |
145 | "json-utf-8" => sub { encode_json $_ }, | |
146 | "json-pretty" => sub { JSON::XS->new->utf8->pretty->encode ($_) }, | |
147 | "json-utf-16le" => sub { encode "utf-16le", JSON::XS->new->encode ($_) }, | |
148 | "json-utf-16be" => sub { encode "utf-16be", JSON::XS->new->encode ($_) }, | |
149 | "json-utf-32le" => sub { encode "utf-32le", JSON::XS->new->encode ($_) }, | |
150 | "json-utf-32be" => sub { encode "utf-32be", JSON::XS->new->encode ($_) }, | |
151 | ||
152 | "storable" => sub { Storable::nfreeze $_ }, | |
153 | "storable-file" => sub { open my $fh, ">", \my $buf; Storable::nstore_fd $_, $fh; $buf }, | |
154 | ||
155 | "clzf" => sub { require Compress::LZF; Compress::LZF::sfreeze_cr ($_) }, | |
156 | "yaml" => sub { require YAML; YAML::Dump ($_) }, | |
157 | ); | |
158 | ||
159 | $F{$opt_from} | |
160 | or die "$opt_from: not a valid fromformat\n"; | |
161 | ||
162 | $T{$opt_to} | |
163 | or die "$opt_from: not a valid toformat\n"; | |
164 | ||
165 | { | |
166 | local $/; | |
167 | binmode STDIN; # stupid perl sometimes thinks its funny | |
168 | $_ = <STDIN>; | |
169 | } | |
170 | ||
171 | $_ = $F{$opt_from}->(); | |
172 | $_ = $T{$opt_to}->(); | |
173 | ||
174 | binmode STDOUT; | |
175 | print $_; | |
176 | ||
177 | ||
178 |
1 | 1 | |
2 | 2 | # Usage: bench json-file |
3 | 3 | |
4 | # which modules to test (JSON usually excluded because its so slow) | |
4 | # which modules to test (JSON::PP usually excluded because its so slow) | |
5 | 5 | my %tst = ( |
6 | # "JSON" => ['JSON::objToJson $perl' , 'JSON::jsonToObj $json'], | |
7 | # "JSON::PP" => ['$pp->encode ($perl)' , '$pp->decode ($json)'], | |
8 | "JSON::DWIW" => ['$dwiw->to_json ($perl)' , '$dwiw->from_json ($json)'], | |
9 | # "JSON::PC" => ['$pc->convert ($perl)' , '$pc->parse ($json)'], | |
10 | "JSON::Syck" => ['JSON::Syck::Dump $perl' , 'JSON::Syck::Load $json'], | |
11 | "JSON::XS" => ['to_json $perl' , 'from_json $json'], | |
12 | "JSON::XS/2" => ['$xs2->encode ($perl)' , '$xs2->decode ($json)'], | |
13 | "JSON::XS/3" => ['$xs3->encode ($perl)' , '$xs3->decode ($json)'], | |
14 | "Storable" => ['Storable::nfreeze $perl' , 'Storable::thaw $pst'], | |
6 | # "JSON" => ['JSON::encode_json $perl' , 'JSON::decode_json $json'], | |
7 | # "JSON::PP" => ['$pp->encode ($perl)' , '$pp->decode ($json)'], | |
8 | "JSON::DWIW/FJ" => ['$dwiw->to_json ($perl)' , '$dwiw->from_json ($json)'], | |
9 | "JSON::DWIW/DS" => ['$dwiw->to_json ($perl)' , 'JSON::DWIW::deserialize $json'], | |
10 | "JSON::PC" => ['$pc->convert ($perl)' , '$pc->parse ($json)'], | |
11 | "JSON::Syck" => ['JSON::Syck::Dump $perl' , 'JSON::Syck::Load $json'], | |
12 | "JSON::XS" => ['encode_json $perl' , 'decode_json $json'], | |
13 | "JSON::XS/2" => ['$xs2->encode ($perl)' , '$xs2->decode ($json)'], | |
14 | "JSON::XS/3" => ['$xs3->encode ($perl)' , '$xs3->decode ($json)'], | |
15 | "Storable" => ['Storable::nfreeze $perl' , 'Storable::thaw $pst'], | |
15 | 16 | ); |
16 | 17 | |
17 | 18 | use JSON (); |
18 | 19 | use JSON::DWIW; |
19 | 20 | use JSON::PC; |
20 | 21 | use JSON::PP (); |
21 | use JSON::XS qw(to_json from_json); | |
22 | use JSON::XS qw(encode_json decode_json); | |
22 | 23 | use JSON::Syck; |
23 | 24 | use Storable (); |
24 | 25 | |
39 | 40 | $json = <>; |
40 | 41 | |
41 | 42 | # fix syck-brokenised stuff |
42 | $json = JSON::XS->new->ascii(1)->encode (JSON::Syck::Load $json); | |
43 | #$json = JSON::XS->new->ascii(1)->encode (JSON::Syck::Load $json); | |
43 | 44 | |
44 | 45 | #srand 0; $json = JSON::XS->new->utf8(1)->ascii(0)->encode ([join "", map +(chr rand 255), 0..2047]); |
45 | 46 | |
56 | 57 | sub bench($) { |
57 | 58 | my ($code) = @_; |
58 | 59 | |
59 | my $perl = JSON::XS::from_json $json; | |
60 | my $pst = Storable::nfreeze $perl; | |
60 | my $pst = Storable::nfreeze JSON::XS::decode_json $json; # seperately decode as storable stringifies :/ | |
61 | my $perl = JSON::XS::decode_json $json; | |
61 | 62 | |
62 | 63 | my $count = 5; |
63 | 64 | my $times = 200; |
76 | 77 | return $count / $min; |
77 | 78 | } |
78 | 79 | |
79 | printf "%-10s | %10s | %10s |\n", "module", "encode", "decode"; | |
80 | printf "-----------|------------|------------|\n"; | |
80 | printf "%-13s | %10s | %10s |\n", "module", "encode", "decode"; | |
81 | printf "--------------|------------|------------|\n"; | |
81 | 82 | for my $module (sort keys %tst) { |
82 | 83 | my $enc = bench $tst{$module}[0]; |
83 | 84 | my $dec = bench $tst{$module}[1]; |
84 | 85 | |
85 | printf "%-10s | %10.3f | %10.3f |\n", $module, $enc, $dec; | |
86 | printf "%-13s | %10.3f | %10.3f |\n", $module, $enc, $dec; | |
86 | 87 | } |
87 | printf "-----------+------------+------------+\n"; | |
88 | printf "--------------+------------+------------+\n"; | |
88 | 89 |
23 | 23 | ok ($js->max_depth(2)->encode ([{}])); |
24 | 24 | ok (!eval { $js->encode ([[{}]]), 1 }); |
25 | 25 | |
26 | ok (eval { ref $js->max_size (7)->decode ("[ ]") }); | |
27 | eval { $js->max_size (7)->decode ("[ ]") }; ok ($@ =~ /max_size/); | |
26 | ok (eval { ref $js->max_size (8)->decode ("[ ]") }); | |
27 | eval { $js->max_size (8)->decode ("[ ]") }; ok ($@ =~ /max_size/); | |
28 | 28 |
0 | #! perl | |
1 | ||
2 | use strict; | |
3 | no warnings; | |
4 | use Test::More; | |
5 | BEGIN { plan tests => 697 }; | |
6 | ||
7 | use JSON::XS; | |
8 | ||
9 | sub splitter { | |
10 | my ($coder, $text) = @_; | |
11 | ||
12 | for (0 .. length $text) { | |
13 | my $a = substr $text, 0, $_; | |
14 | my $b = substr $text, $_; | |
15 | ||
16 | $coder->incr_parse ($a); | |
17 | $coder->incr_parse ($b); | |
18 | ||
19 | my $data = $coder->incr_parse; | |
20 | ok ($data); | |
21 | ok ($coder->encode ($data) eq $coder->encode ($coder->decode ($text)), "data"); | |
22 | ok ($coder->incr_text =~ /^\s*$/, "tailws"); | |
23 | } | |
24 | } | |
25 | ||
26 | splitter +JSON::XS->new , ' ["x\\"","\\u1000\\\\n\\nx",1,{"\\\\" :5 , "": "x"}]'; | |
27 | splitter +JSON::XS->new , '[ "x\\"","\\u1000\\\\n\\nx" , 1,{"\\\\ " :5 , "": " x"} ] '; | |
28 | splitter +JSON::XS->new->allow_nonref, '"test"'; | |
29 | splitter +JSON::XS->new->allow_nonref, ' "5" '; | |
30 | ||
31 | { | |
32 | my $text = '[5],{"":1} , [ 1,2, 3], {"3":null}'; | |
33 | my $coder = new JSON::XS; | |
34 | for (0 .. length $text) { | |
35 | my $a = substr $text, 0, $_; | |
36 | my $b = substr $text, $_; | |
37 | ||
38 | $coder->incr_parse ($a); | |
39 | $coder->incr_parse ($b); | |
40 | ||
41 | my $j1 = $coder->incr_parse; ok ($coder->incr_text =~ s/^\s*,//, "cskip1"); | |
42 | my $j2 = $coder->incr_parse; ok ($coder->incr_text =~ s/^\s*,//, "cskip2"); | |
43 | my $j3 = $coder->incr_parse; ok ($coder->incr_text =~ s/^\s*,//, "cskip3"); | |
44 | my $j4 = $coder->incr_parse; ok ($coder->incr_text !~ s/^\s*,//, "cskip4"); | |
45 | my $j5 = $coder->incr_parse; ok ($coder->incr_text !~ s/^\s*,//, "cskip5"); | |
46 | ||
47 | ok ('[5]' eq encode_json $j1, "cjson1"); | |
48 | ok ('{"":1}' eq encode_json $j2, "cjson2"); | |
49 | ok ('[1,2,3]' eq encode_json $j3, "cjson3"); | |
50 | ok ('{"3":null}' eq encode_json $j4, "cjson4"); | |
51 | ok (!defined $j5, "cjson5"); | |
52 | } | |
53 | } | |
54 | ||
55 | { | |
56 | my $text = '[x][5]'; | |
57 | my $coder = new JSON::XS; | |
58 | $coder->incr_parse ($text); | |
59 | ok (!eval { $coder->incr_parse }, "sparse1"); | |
60 | ok (!eval { $coder->incr_parse }, "sparse2"); | |
61 | $coder->incr_skip; | |
62 | ok ('[5]' eq $coder->encode (scalar $coder->incr_parse), "sparse3"); | |
63 | } | |
64 | ||
65 | { | |
66 | my $coder = JSON::XS->new->max_size (5); | |
67 | ok (!$coder->incr_parse ("[ "), "incsize1"); | |
68 | eval { !$coder->incr_parse ("] ") }; ok ($@ =~ /6 bytes/, "incsize2 $@"); | |
69 | } | |
70 | ||
71 | { | |
72 | my $coder = JSON::XS->new->max_depth (3); | |
73 | ok (!$coder->incr_parse ("[[["), "incdepth1"); | |
74 | eval { !$coder->incr_parse (" [] ") }; ok ($@ =~ /maximum nesting/, "incdepth2 $@"); | |
75 | } | |
76 | ||
77 | # contributed by yuval kogman, reformatted to fit style | |
78 | { | |
79 | my $coder = JSON::XS->new; | |
80 | ||
81 | my $res = eval { $coder->incr_parse("]") }; | |
82 | my $e = $@; # test more clobbers $@, we need it twice | |
83 | ||
84 | ok (!$res, "unbalanced bracket"); | |
85 | ok ($e, "got error"); | |
86 | like ($e, qr/malformed/, "malformed json string error"); | |
87 | ||
88 | $coder->incr_skip; | |
89 | ||
90 | is_deeply (eval { $coder->incr_parse("[42]") }, [42], "valid data after incr_skip"); | |
91 | } | |
92 | ||
93 |