Import upstream version 1.01+git20200218.1.83c5beb
Debian Janitor
2 years ago
0 | 0 | |
1 | 1 | === version history for XML::LibXML::Simple |
2 | ||
3 | version 1.02: | |
4 | ||
5 | Improvements: | |
6 | - document difference between XML::Simple and XML::LibXML::Simple | |
7 | on the return of an element which only contains white space. | |
8 | github#1 [tony-o] | |
2 | 9 | |
3 | 10 | version 1.01: Wed 15 Jan 2020 10:47:10 PM CET |
4 | 11 |
0 | 0 | ChangeLog |
1 | 1 | MANIFEST |
2 | 2 | Makefile.PL |
3 | README | |
4 | 3 | README.md |
5 | 4 | lib/XML/LibXML/Simple.pm |
6 | lib/XML/LibXML/Simple.pod | |
7 | 5 | t/01use.t |
8 | 6 | t/10XMLin.t |
9 | 7 | t/10XMLin.xml |
3 | 3 | "Mark Overmeer <markov@cpan.org>" |
4 | 4 | ], |
5 | 5 | "dynamic_config" : 1, |
6 | "generated_by" : "ExtUtils::MakeMaker version 7.3, CPAN::Meta::Converter version 2.150010", | |
6 | "generated_by" : "ExtUtils::MakeMaker version 7.62, CPAN::Meta::Converter version 2.150010", | |
7 | 7 | "license" : [ |
8 | 8 | "perl_5" |
9 | 9 | ], |
49 | 49 | "web" : "https://github.com/markov2/perl5-XML-LibXML-Simple" |
50 | 50 | } |
51 | 51 | }, |
52 | "version" : "1.01", | |
53 | "x_serialization_backend" : "JSON::PP version 2.94" | |
52 | "version" : "1.02", | |
53 | "x_serialization_backend" : "JSON::PP version 4.06" | |
54 | 54 | } |
6 | 6 | configure_requires: |
7 | 7 | ExtUtils::MakeMaker: '0' |
8 | 8 | dynamic_config: 1 |
9 | generated_by: 'ExtUtils::MakeMaker version 7.3, CPAN::Meta::Converter version 2.150010' | |
9 | generated_by: 'ExtUtils::MakeMaker version 7.62, CPAN::Meta::Converter version 2.150010' | |
10 | 10 | license: perl |
11 | 11 | meta-spec: |
12 | 12 | url: http://module-build.sourceforge.net/META-spec-v1.4.html |
24 | 24 | homepage: http://perl.overmeer.net/CPAN/ |
25 | 25 | license: http://dev.perl.org/licenses/ |
26 | 26 | repository: https://github.com/markov2/perl5-XML-LibXML-Simple.git |
27 | version: '1.01' | |
28 | x_serialization_backend: 'CPAN::Meta::YAML version 0.011' | |
27 | version: '1.02' | |
28 | x_serialization_backend: 'CPAN::Meta::YAML version 0.018' |
5 | 5 | |
6 | 6 | WriteMakefile |
7 | 7 | ( NAME => 'XML::LibXML::Simple' |
8 | , VERSION => '1.01' | |
8 | , VERSION => '1.02' | |
9 | 9 | , PREREQ_PM => |
10 | 10 | { XML::LibXML => 1.64 |
11 | 11 | , Test::More => 0.54 |
0 | === README for XML-LibXML-Simple version 1.00 | |
1 | = Generated on Wed Jan 15 11:56:02 2020 by OODoc 2.02 | |
2 | ||
3 | There are various ways to install this module: | |
4 | ||
5 | (1) if you have a command-line, you can do: | |
6 | perl -MCPAN -e 'install <any package from this distribution>' | |
7 | ||
8 | (2) if you use Windows, have a look at http://ppm.activestate.com/ | |
9 | ||
10 | (3) if you have downloaded this module manually (as root/administrator) | |
11 | gzip -d XML-LibXML-Simple-1.00.tar.gz | |
12 | tar -xf XML-LibXML-Simple-1.00.tar | |
13 | cd XML-LibXML-Simple-1.00 | |
14 | perl Makefile.PL | |
15 | make # optional | |
16 | make test # optional | |
17 | make install | |
18 | ||
19 | For usage, see the included manual-pages or | |
20 | http://search.cpan.org/dist/XML-LibXML-Simple-1.00/ | |
21 | ||
22 | Please report problems to | |
23 | http://rt.cpan.org/Dist/Display.html?Queue=XML-LibXML-Simple | |
24 |
0 | # Copyrights 2008-2020 by [Mark Overmeer <markov@cpan.org>]. | |
1 | # For other contributors see ChangeLog. | |
2 | # See the manual pages for details on the licensing terms. | |
3 | # Pod stripped from pm file by OODoc 2.02. | |
4 | 0 | # This code is part of distribution XML-LibXML-Simple. Meta-POD processed |
5 | 1 | # with OODoc into POD and HTML manual-pages. See README.md |
6 | 2 | # Copyright Mark Overmeer. Licensed under the same terms as Perl itself. |
7 | 3 | |
8 | 4 | package XML::LibXML::Simple; |
9 | use vars '$VERSION'; | |
10 | $VERSION = '1.01'; | |
11 | ||
12 | 5 | use base 'Exporter'; |
13 | 6 | |
14 | 7 | use strict; |
25 | 18 | |
26 | 19 | use Data::Dumper; #to be removed |
27 | 20 | |
21 | =chapter NAME | |
22 | ||
23 | XML::LibXML::Simple - XML::LibXML clone of XML::Simple::XMLin() | |
24 | ||
25 | =chapter SYNOPSIS | |
26 | ||
27 | my $xml = ...; # filename, fh, string, or XML::LibXML-node | |
28 | ||
29 | Imperative: | |
30 | ||
31 | use XML::LibXML::Simple qw(XMLin); | |
32 | my $data = XMLin $xml, %options; | |
33 | ||
34 | Or the Object Oriented way: | |
35 | ||
36 | use XML::LibXML::Simple (); | |
37 | my $xs = XML::LibXML::Simple->new(%options); | |
38 | my $data = $xs->XMLin($xml, %options); | |
39 | ||
40 | =chapter DESCRIPTION | |
41 | ||
42 | This module is a blunt rewrite of M<XML::Simple> (by Grant McLean) to | |
43 | use the M<XML::LibXML> parser for XML structures, where the original | |
44 | uses plain Perl or SAX parsers. | |
45 | ||
46 | B<Be warned:> this module thinks to be smart. You may very well shoot | |
47 | yourself in the foot with this DWIMmery. Read the whole manual page | |
48 | at least once before you start using it. When your XML is described in | |
49 | a schema or WSDL, then use M<XML::Compile> for maintainable code. | |
50 | ||
51 | =chapter METHODS | |
52 | ||
53 | =cut | |
28 | 54 | |
29 | 55 | my %known_opts = map +($_ => 1), |
30 | 56 | qw(keyattr keeproot forcecontent contentkey noattr searchpath |
35 | 61 | my $default_content_key = 'content'; |
36 | 62 | |
37 | 63 | #------------- |
64 | =section Constructors | |
65 | ||
66 | =c_method new %options | |
67 | Instantiate an object, which can be used to call M<XMLin()> on. You can | |
68 | provide %options to this constructor (to be reused for each call to XMLin) | |
69 | and with each call of XMLin (to be used once) | |
70 | ||
71 | For descriptions of the %options see the L</DETAILS> | |
72 | section of this manual page. | |
73 | ||
74 | =cut | |
38 | 75 | |
39 | 76 | sub new(@) |
40 | 77 | { my $class = shift; |
49 | 86 | } |
50 | 87 | |
51 | 88 | #------------- |
89 | =section Translators | |
90 | ||
91 | =method XMLin $xmldata, %options | |
92 | For $xmldata and descriptions of the %options see the L</DETAILS> | |
93 | section of this manual page. | |
94 | ||
95 | =cut | |
52 | 96 | |
53 | 97 | sub XMLin |
54 | 98 | { my $self = @_ > 1 && blessed $_[0] && $_[0]->isa(__PACKAGE__) ? shift |
510 | 554 | |
511 | 555 | __END__ |
512 | 556 | |
557 | =chapter FUNCTIONS | |
558 | ||
559 | The functions C<XMLin> (exported implictly) and C<xml_in> | |
560 | (exported on request) simply call C<<XML::LibXML::Simple->new->XMLin() >> | |
561 | with the provided parameters. | |
562 | ||
563 | =chapter DETAILS | |
564 | ||
565 | =section Parameter $xmldata | |
566 | ||
567 | As first parameter to M<XMLin()> must provide the XML message to be | |
568 | translated into a Perl structure. Choose one of the following: | |
569 | ||
570 | =over 4 | |
571 | ||
572 | =item A filename | |
573 | ||
574 | If the filename contains no directory components, C<XMLin()> will look for the | |
575 | file in each directory in the SearchPath (see OPTIONS below) and in the | |
576 | current directory. eg: | |
577 | ||
578 | $data = XMLin('/etc/params.xml', %options); | |
579 | ||
580 | =item A dash (-) | |
581 | ||
582 | Parse from STDIN. | |
583 | ||
584 | $data = XMLin('-', %options); | |
585 | ||
586 | =item undef | |
587 | ||
588 | [deprecated] | |
589 | If there is no XML specifier, C<XMLin()> will check the script directory and | |
590 | each of the SearchPath directories for a file with the same name as the script | |
591 | but with the extension '.xml'. Note: if you wish to specify options, you | |
592 | must specify the value 'undef'. eg: | |
593 | ||
594 | $data = XMLin(undef, ForceArray => 1); | |
595 | ||
596 | This feature is available for backwards compatibility with M<XML::Simple>, | |
597 | but quite sensitive. You can easily hit the wrong xml file as input. | |
598 | Please do not use it: always use an explicit filename. | |
599 | ||
600 | =item A string of XML | |
601 | ||
602 | A string containing XML (recognised by the presence of '<' and '>' characters) | |
603 | will be parsed directly. eg: | |
604 | ||
605 | $data = XMLin('<opt username="bob" password="flurp" />', %options); | |
606 | ||
607 | =item An IO::Handle object | |
608 | ||
609 | In this case, XML::LibXML::Parser will read the XML data directly from | |
610 | the provided file. | |
611 | ||
612 | # $fh = IO::File->new('/etc/params.xml') or die; | |
613 | open my $fh, '<:encoding(utf8)', '/etc/params.xml' or die; | |
614 | ||
615 | $data = XMLin($fh, %options); | |
616 | ||
617 | =item An XML::LibXML::Document or ::Element | |
618 | ||
619 | [Not available in XML::Simple] When you have a pre-parsed XML::LibXML | |
620 | node, you can pass that. | |
621 | ||
622 | =back | |
623 | ||
624 | =section Parameter %options | |
625 | ||
626 | M<XML::LibXML::Simple> supports most options defined by M<XML::Simple>, so | |
627 | the interface is quite compatible. Minor changes apply. This explanation | |
628 | is extracted from the XML::Simple manual-page. | |
629 | ||
630 | =over 4 | |
631 | ||
632 | =item * | |
633 | ||
634 | check out C<ForceArray> because you'll almost certainly want to turn it on | |
635 | ||
636 | =item * | |
637 | ||
638 | make sure you know what the C<KeyAttr> option does and what its default | |
639 | value is because it may surprise you otherwise. | |
640 | ||
641 | =item * | |
642 | ||
643 | Option names are case in-sensitive so you can use the mixed case versions | |
644 | shown here; you can add underscores between the words (eg: key_attr) | |
645 | if you like. | |
646 | ||
647 | =back | |
648 | ||
649 | In alphabetic order: | |
650 | ||
651 | =over 4 | |
652 | ||
653 | =item ContentKey => 'keyname' I<# seldom used> | |
654 | ||
655 | When text content is parsed to a hash value, this option lets you specify a | |
656 | name for the hash key to override the default 'content'. So for example: | |
657 | ||
658 | XMLin('<opt one="1">Two</opt>', ContentKey => 'text') | |
659 | ||
660 | will parse to: | |
661 | ||
662 | { one => 1, text => 'Two' } | |
663 | ||
664 | instead of: | |
665 | ||
666 | { one => 1, content => 'Two' } | |
667 | ||
668 | You can also prefix your selected key name with a '-' character to have | |
669 | C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after | |
670 | array folding. For example: | |
671 | ||
672 | XMLin( | |
673 | '<opt><item name="one">First</item><item name="two">Second</item></opt>', | |
674 | KeyAttr => {item => 'name'}, | |
675 | ForceArray => [ 'item' ], | |
676 | ContentKey => '-content' | |
677 | ) | |
678 | ||
679 | will parse to: | |
680 | ||
681 | { | |
682 | item => { | |
683 | one => 'First' | |
684 | two => 'Second' | |
685 | } | |
686 | } | |
687 | ||
688 | rather than this (without the '-'): | |
689 | ||
690 | { | |
691 | item => { | |
692 | one => { content => 'First' } | |
693 | two => { content => 'Second' } | |
694 | } | |
695 | } | |
696 | ||
697 | =item ForceArray => 1 I<# important> | |
698 | ||
699 | This option should be set to '1' to force nested elements to be represented | |
700 | as arrays even when there is only one. Eg, with ForceArray enabled, this | |
701 | XML: | |
702 | ||
703 | <opt> | |
704 | <name>value</name> | |
705 | </opt> | |
706 | ||
707 | would parse to this: | |
708 | ||
709 | { name => [ 'value' ] } | |
710 | ||
711 | instead of this (the default): | |
712 | ||
713 | { name => 'value' } | |
714 | ||
715 | This option is especially useful if the data structure is likely to be written | |
716 | back out as XML and the default behaviour of rolling single nested elements up | |
717 | into attributes is not desirable. | |
718 | ||
719 | If you are using the array folding feature, you should almost certainly | |
720 | enable this option. If you do not, single nested elements will not be | |
721 | parsed to arrays and therefore will not be candidates for folding to a | |
722 | hash. (Given that the default value of 'KeyAttr' enables array folding, | |
723 | the default value of this option should probably also have been enabled | |
724 | as well). | |
725 | ||
726 | =item ForceArray => [ names ] I<# important> | |
727 | ||
728 | This alternative (and preferred) form of the 'ForceArray' option allows you to | |
729 | specify a list of element names which should always be forced into an array | |
730 | representation, rather than the 'all or nothing' approach above. | |
731 | ||
732 | It is also possible to include compiled regular | |
733 | expressions in the list --any element names which match the pattern | |
734 | will be forced to arrays. If the list contains only a single regex, | |
735 | then it is not necessary to enclose it in an arrayref. Eg: | |
736 | ||
737 | ForceArray => qr/_list$/ | |
738 | ||
739 | =item ForceContent => 1 I<# seldom used> | |
740 | ||
741 | When C<XMLin()> parses elements which have text content as well as attributes, | |
742 | the text content must be represented as a hash value rather than a simple | |
743 | scalar. This option allows you to force text content to always parse to | |
744 | a hash value even when there are no attributes. So for example: | |
745 | ||
746 | XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) | |
747 | ||
748 | will parse to: | |
749 | ||
750 | { | |
751 | x => { content => 'text1' }, | |
752 | y => { a => 2, content => 'text2' } | |
753 | } | |
754 | ||
755 | instead of: | |
756 | ||
757 | { | |
758 | x => 'text1', | |
759 | y => { 'a' => 2, 'content' => 'text2' } | |
760 | } | |
761 | ||
762 | =item GroupTags => { grouping tag => grouped tag } I<# handy> | |
763 | ||
764 | You can use this option to eliminate extra levels of indirection in your Perl | |
765 | data structure. For example this XML: | |
766 | ||
767 | <opt> | |
768 | <searchpath> | |
769 | <dir>/usr/bin</dir> | |
770 | <dir>/usr/local/bin</dir> | |
771 | <dir>/usr/X11/bin</dir> | |
772 | </searchpath> | |
773 | </opt> | |
774 | ||
775 | Would normally be read into a structure like this: | |
776 | ||
777 | { | |
778 | searchpath => { | |
779 | dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
780 | } | |
781 | } | |
782 | ||
783 | But when read in with the appropriate value for 'GroupTags': | |
784 | ||
785 | my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); | |
786 | ||
787 | It will return this simpler structure: | |
788 | ||
789 | { | |
790 | searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
791 | } | |
792 | ||
793 | The grouping element (C<< <searchpath> >> in the example) must not contain any | |
794 | attributes or elements other than the grouped element. | |
795 | ||
796 | You can specify multiple 'grouping element' to 'grouped element' mappings in | |
797 | the same hashref. If this option is combined with C<KeyAttr>, the array | |
798 | folding will occur first and then the grouped element names will be eliminated. | |
799 | ||
800 | =item HookNodes => CODE | |
801 | Select document nodes to apply special tricks. | |
802 | Introduced in [0.96], not available in XML::Simple. | |
803 | ||
804 | When this option is provided, the CODE will be called once the XML DOM | |
805 | tree is ready to get transformed into Perl. Your CODE should return | |
806 | either C<undef> (nothing to do) or a HASH which maps values of | |
807 | unique_key (see M<XML::LibXML::Node> method C<unique_key> onto CODE | |
808 | references to be called. | |
809 | ||
810 | Once the translater from XML into Perl reaches a selected node, it will | |
811 | call your routine specific for that node. That triggering node found | |
812 | is the only parameter. When you return C<undef>, the node will not be | |
813 | found in the final result. You may return any data (even the node itself) | |
814 | which will be included in the final result as is, under the name of the | |
815 | original node. | |
816 | ||
817 | Example: | |
818 | ||
819 | my $out = XMLin $file, HookNodes => \&protect_html; | |
820 | ||
821 | sub protect_html($$) | |
822 | { # $obj is the instantated XML::Compile::Simple object | |
823 | # $xml is a XML::LibXML::Element to get transformed | |
824 | my ($obj, $xml) = @_; | |
825 | ||
826 | my %hooks; # collects the table of hooks | |
827 | ||
828 | # do an xpath search for HTML | |
829 | my $xpc = XML::LibXML::XPathContext->new($xml); | |
830 | my @nodes = $xpc->findNodes(...); #XXX | |
831 | @nodes or return undef; | |
832 | ||
833 | my $as_text = sub { $_[0]->toString(0) }; # as text | |
834 | # $as_node = sub { $_[0] }; # as node | |
835 | # $skip = sub { undef }; # not at all | |
836 | ||
837 | # the same behavior for all xpath nodes, in this example | |
838 | $hook{$_->unique_key} = $as_text | |
839 | for @nodes; | |
840 | ||
841 | \%hook; | |
842 | } | |
843 | ||
844 | =item KeepRoot => 1 I<# handy> | |
845 | ||
846 | In its attempt to return a data structure free of superfluous detail and | |
847 | unnecessary levels of indirection, C<XMLin()> normally discards the root | |
848 | element name. Setting the 'KeepRoot' option to '1' will cause the root element | |
849 | name to be retained. So after executing this code: | |
850 | ||
851 | $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) | |
852 | ||
853 | You'll be able to reference the tempdir as | |
854 | C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default | |
855 | C<$config-E<gt>{tempdir}>. | |
856 | ||
857 | =item KeyAttr => [ list ] I<# important> | |
858 | ||
859 | This option controls the 'array folding' feature which translates nested | |
860 | elements from an array to a hash. It also controls the 'unfolding' of hashes | |
861 | to arrays. | |
862 | ||
863 | For example, this XML: | |
864 | ||
865 | <opt> | |
866 | <user login="grep" fullname="Gary R Epstein" /> | |
867 | <user login="stty" fullname="Simon T Tyson" /> | |
868 | </opt> | |
869 | ||
870 | would, by default, parse to this: | |
871 | ||
872 | { | |
873 | user => [ | |
874 | { login => 'grep', | |
875 | fullname => 'Gary R Epstein' | |
876 | }, | |
877 | { login => 'stty', | |
878 | fullname => 'Simon T Tyson' | |
879 | } | |
880 | ] | |
881 | } | |
882 | ||
883 | If the option 'KeyAttr => "login"' were used to specify that the 'login' | |
884 | attribute is a key, the same XML would parse to: | |
885 | ||
886 | { | |
887 | user => { | |
888 | stty => { fullname => 'Simon T Tyson' }, | |
889 | grep => { fullname => 'Gary R Epstein' } | |
890 | } | |
891 | } | |
892 | ||
893 | The key attribute names should be supplied in an arrayref if there is more | |
894 | than one. C<XMLin()> will attempt to match attribute names in the order | |
895 | supplied. | |
896 | ||
897 | Note 1: The default value for 'KeyAttr' is C<< ['name', 'key', 'id'] >>. | |
898 | If you do not want folding on input or unfolding on output you must | |
899 | setting this option to an empty list to disable the feature. | |
900 | ||
901 | Note 2: If you wish to use this option, you should also enable the | |
902 | C<ForceArray> option. Without 'ForceArray', a single nested element will be | |
903 | rolled up into a scalar rather than an array and therefore will not be folded | |
904 | (since only arrays get folded). | |
905 | ||
906 | =item KeyAttr => { list } I<# important> | |
907 | ||
908 | This alternative (and preferred) method of specifying the key attributes | |
909 | allows more fine grained control over which elements are folded and on which | |
910 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause | |
911 | any package elements to be folded on the 'id' attribute. No other elements | |
912 | which have an 'id' attribute will be folded at all. | |
913 | ||
914 | Two further variations are made possible by prefixing a '+' or a '-' character | |
915 | to the attribute name: | |
916 | ||
917 | The option 'KeyAttr => { user => "+login" }' will cause this XML: | |
918 | ||
919 | <opt> | |
920 | <user login="grep" fullname="Gary R Epstein" /> | |
921 | <user login="stty" fullname="Simon T Tyson" /> | |
922 | </opt> | |
923 | ||
924 | to parse to this data structure: | |
925 | ||
926 | { | |
927 | user => { | |
928 | stty => { | |
929 | fullname => 'Simon T Tyson', | |
930 | login => 'stty' | |
931 | }, | |
932 | grep => { | |
933 | fullname => 'Gary R Epstein', | |
934 | login => 'grep' | |
935 | } | |
936 | } | |
937 | } | |
938 | ||
939 | The '+' indicates that the value of the key attribute should be copied | |
940 | rather than moved to the folded hash key. | |
941 | ||
942 | A '-' prefix would produce this result: | |
943 | ||
944 | { | |
945 | user => { | |
946 | stty => { | |
947 | fullname => 'Simon T Tyson', | |
948 | -login => 'stty' | |
949 | }, | |
950 | grep => { | |
951 | fullname => 'Gary R Epstein', | |
952 | -login => 'grep' | |
953 | } | |
954 | } | |
955 | } | |
956 | ||
957 | =item NoAttr => 1 I<# handy> | |
958 | ||
959 | When used with C<XMLin()>, any attributes in the XML will be ignored. | |
960 | ||
961 | =item NormaliseSpace => 0 | 1 | 2 I<# handy> | |
962 | ||
963 | This option controls how whitespace in text content is handled. Recognised | |
964 | values for the option are: | |
965 | ||
966 | =over 4 | |
967 | =item "0" | |
968 | (default) whitespace is passed through unaltered (except of course for the | |
969 | normalisation of whitespace in attribute values which is mandated by the XML | |
970 | recommendation) | |
971 | ||
972 | =item "1" | |
973 | whitespace is normalised in any value used as a hash key (normalising means | |
974 | removing leading and trailing whitespace and collapsing sequences of whitespace | |
975 | characters to a single space) | |
976 | ||
977 | =item "2" | |
978 | whitespace is normalised in all text content | |
979 | ||
980 | =back | |
981 | ||
982 | Note: you can spell this option with a 'z' if that is more natural for you. | |
983 | ||
984 | =item Parser => OBJECT | |
985 | ||
986 | You may pass your own M<XML::LibXML> object, in stead of having one | |
987 | created for you. This is useful when you need specific configuration | |
988 | on that object (See M<XML::LibXML::Parser>) or have implemented your | |
989 | own extension to that object. | |
990 | ||
991 | The internally created parser object is configured in safe mode. | |
992 | Read the M<XML::LibXML::Parser> manual about security issues with | |
993 | certain parameter settings. The default is unsafe! | |
994 | ||
995 | =item ParserOpts => HASH|ARRAY | |
996 | ||
997 | Pass parameters to the creation of a new internal parser object. You | |
998 | can overrule the options which will create a safe parser. It may be more | |
999 | readible to use the C<Parser> parameter. | |
1000 | ||
1001 | =item SearchPath => [ list ] I<# handy> | |
1002 | ||
1003 | If you pass C<XMLin()> a filename, but the filename include no directory | |
1004 | component, you can use this option to specify which directories should be | |
1005 | searched to locate the file. You might use this option to search first in the | |
1006 | user's home directory, then in a global directory such as /etc. | |
1007 | ||
1008 | If a filename is provided to C<XMLin()> but SearchPath is not defined, the | |
1009 | file is assumed to be in the current directory. | |
1010 | ||
1011 | If the first parameter to C<XMLin()> is undefined, the default SearchPath | |
1012 | will contain only the directory in which the script itself is located. | |
1013 | Otherwise the default SearchPath will be empty. | |
1014 | ||
1015 | =item SuppressEmpty => 1 | '' | undef | |
1016 | ||
1017 | [0.99] What to do with empty elements (no attributes and no content). The | |
1018 | default behaviour is to represent them as empty hashes. Setting this | |
1019 | option to a true value (eg: 1) will cause empty elements to be skipped | |
1020 | altogether. Setting the option to 'undef' or the empty string will | |
1021 | cause empty elements to be represented as the undefined value or the | |
1022 | empty string respectively. | |
1023 | ||
1024 | =item ValueAttr => [ names ] I<# handy> | |
1025 | ||
1026 | Use this option to deal elements which always have a single attribute and no | |
1027 | content. Eg: | |
1028 | ||
1029 | <opt> | |
1030 | <colour value="red" /> | |
1031 | <size value="XXL" /> | |
1032 | </opt> | |
1033 | ||
1034 | Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: | |
1035 | ||
1036 | { | |
1037 | colour => 'red', | |
1038 | size => 'XXL' | |
1039 | } | |
1040 | ||
1041 | instead of this (the default): | |
1042 | ||
1043 | { | |
1044 | colour => { value => 'red' }, | |
1045 | size => { value => 'XXL' } | |
1046 | } | |
1047 | ||
1048 | =item NsExpand => 0 I<advised> | |
1049 | ||
1050 | When name-spaces are used, the default behavior is to include the | |
1051 | prefix in the key name. However, this is very dangerous: the prefixes | |
1052 | can be changed without a change of the XML message meaning. Therefore, | |
1053 | you can better use this C<NsExpand> option. The downside, however, is | |
1054 | that the labels get very long. | |
1055 | ||
1056 | Without this option: | |
1057 | ||
1058 | <record xmlns:x="http://xyz"> | |
1059 | <x:field1>42</x:field1> | |
1060 | </record> | |
1061 | <record xmlns:y="http://xyz"> | |
1062 | <y:field1>42</y:field1> | |
1063 | </record> | |
1064 | ||
1065 | translates into | |
1066 | ||
1067 | { 'x:field1' => 42 } | |
1068 | { 'y:field1' => 42 } | |
1069 | ||
1070 | but both source component have exactly the same meaning. When C<NsExpand> | |
1071 | is used, the result is: | |
1072 | ||
1073 | { '{http://xyz}field1' => 42 } | |
1074 | { '{http://xyz}field1' => 42 } | |
1075 | ||
1076 | Of course, addressing these fields is more work. It is advised to implement | |
1077 | it like this: | |
1078 | ||
1079 | my $ns = 'http://xyz'; | |
1080 | $data->{"{$ns}field1"}; | |
1081 | ||
1082 | =item NsStrip => 0 I<sloppy coding> | |
1083 | ||
1084 | [not available in XML::Simple] | |
1085 | Namespaces are really important to avoid name collissions, but they are | |
1086 | a bit of a hassle. To do it correctly, use option C<NsExpand>. To do | |
1087 | it sloppy, use C<NsStrip>. With this option set, the above example will | |
1088 | return | |
1089 | ||
1090 | { field1 => 42 } | |
1091 | { field1 => 42 } | |
1092 | ||
1093 | =back | |
1094 | ||
1095 | =chapter EXAMPLES | |
1096 | ||
1097 | When C<XMLin()> reads the following very simple piece of XML: | |
1098 | ||
1099 | <opt username="testuser" password="frodo"></opt> | |
1100 | ||
1101 | it returns the following data structure: | |
1102 | ||
1103 | { | |
1104 | username => 'testuser', | |
1105 | password => 'frodo' | |
1106 | } | |
1107 | ||
1108 | The identical result could have been produced with this alternative XML: | |
1109 | ||
1110 | <opt username="testuser" password="frodo" /> | |
1111 | ||
1112 | Or this (although see 'ForceArray' option for variations): | |
1113 | ||
1114 | <opt> | |
1115 | <username>testuser</username> | |
1116 | <password>frodo</password> | |
1117 | </opt> | |
1118 | ||
1119 | Repeated nested elements are represented as anonymous arrays: | |
1120 | ||
1121 | <opt> | |
1122 | <person firstname="Joe" lastname="Smith"> | |
1123 | <email>joe@smith.com</email> | |
1124 | <email>jsmith@yahoo.com</email> | |
1125 | </person> | |
1126 | <person firstname="Bob" lastname="Smith"> | |
1127 | <email>bob@smith.com</email> | |
1128 | </person> | |
1129 | </opt> | |
1130 | ||
1131 | { | |
1132 | person => [ | |
1133 | { email => [ 'joe@smith.com', 'jsmith@yahoo.com' ], | |
1134 | firstname => 'Joe', | |
1135 | lastname => 'Smith' | |
1136 | }, | |
1137 | { email => 'bob@smith.com', | |
1138 | firstname => 'Bob', | |
1139 | lastname => 'Smith' | |
1140 | } | |
1141 | ] | |
1142 | } | |
1143 | ||
1144 | Nested elements with a recognised key attribute are transformed (folded) from | |
1145 | an array into a hash keyed on the value of that attribute (see the C<KeyAttr> | |
1146 | option): | |
1147 | ||
1148 | <opt> | |
1149 | <person key="jsmith" firstname="Joe" lastname="Smith" /> | |
1150 | <person key="tsmith" firstname="Tom" lastname="Smith" /> | |
1151 | <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> | |
1152 | </opt> | |
1153 | ||
1154 | { | |
1155 | person => { | |
1156 | jbloggs => { | |
1157 | firstname => 'Joe', | |
1158 | lastname => 'Bloggs' | |
1159 | }, | |
1160 | tsmith => { | |
1161 | firstname => 'Tom', | |
1162 | lastname => 'Smith' | |
1163 | }, | |
1164 | jsmith => { | |
1165 | firstname => 'Joe', | |
1166 | lastname => 'Smith' | |
1167 | } | |
1168 | } | |
1169 | } | |
1170 | ||
1171 | ||
1172 | The <anon> tag can be used to form anonymous arrays: | |
1173 | ||
1174 | <opt> | |
1175 | <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> | |
1176 | <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> | |
1177 | <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> | |
1178 | <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> | |
1179 | </opt> | |
1180 | ||
1181 | { | |
1182 | head => [ [ 'Col 1', 'Col 2', 'Col 3' ] ], | |
1183 | data => [ [ 'R1C1', 'R1C2', 'R1C3' ], | |
1184 | [ 'R2C1', 'R2C2', 'R2C3' ], | |
1185 | [ 'R3C1', 'R3C2', 'R3C3' ] | |
1186 | ] | |
1187 | } | |
1188 | ||
1189 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if | |
1190 | the surrounding tags for an XML document contain only an anonymous array the | |
1191 | arrayref will be returned directly rather than the usual hashref: | |
1192 | ||
1193 | <opt> | |
1194 | <anon><anon>Col 1</anon><anon>Col 2</anon></anon> | |
1195 | <anon><anon>R1C1</anon><anon>R1C2</anon></anon> | |
1196 | <anon><anon>R2C1</anon><anon>R2C2</anon></anon> | |
1197 | </opt> | |
1198 | ||
1199 | [ | |
1200 | [ 'Col 1', 'Col 2' ], | |
1201 | [ 'R1C1', 'R1C2' ], | |
1202 | [ 'R2C1', 'R2C2' ] | |
1203 | ] | |
1204 | ||
1205 | Elements which only contain text content will simply be represented as a | |
1206 | scalar. Where an element has both attributes and text content, the element | |
1207 | will be represented as a hashref with the text content in the 'content' key | |
1208 | (see the C<ContentKey> option): | |
1209 | ||
1210 | <opt> | |
1211 | <one>first</one> | |
1212 | <two attr="value">second</two> | |
1213 | </opt> | |
1214 | ||
1215 | { | |
1216 | one => 'first', | |
1217 | two => { attr => 'value', content => 'second' } | |
1218 | } | |
1219 | ||
1220 | Mixed content (elements which contain both text content and nested elements) | |
1221 | will be not be represented in a useful way - element order and significant | |
1222 | whitespace will be lost. If you need to work with mixed content, then | |
1223 | XML::Simple is not the right tool for your job - check out the next section. | |
1224 | ||
1225 | =section Differences to XML::Simple | |
1226 | ||
1227 | In general, the output and the options are equivalent, although this | |
1228 | module has some differences with M<XML::Simple> to be aware of. | |
1229 | ||
1230 | =over 4 | |
1231 | =item only M<XMLin()> is supported | |
1232 | If you want to write XML then use a schema (for instance with | |
1233 | M<XML::Compile>). Do not attempt to create XML by hand! If you still | |
1234 | think you need it, then have a look at XMLout() as implemented by | |
1235 | M<XML::Simple> or any of a zillion template systems. | |
1236 | ||
1237 | =item no "variables" option | |
1238 | IMO, you should use a templating system if you want variables filled-in | |
1239 | in the input: it is not a task for this module. | |
1240 | ||
1241 | =item ForceArray options | |
1242 | There are a few small differences in the result of the C<forcearray> option, | |
1243 | because M<XML::Simple> seems to behave inconsequently. | |
1244 | ||
1245 | =item Elements with only blanks | |
1246 | When your element contains white space (new-lines etc), it is | |
1247 | not considered an empty list of sub-elements (XML::Simple returns an | |
1248 | empty HASH), but a simple text string containing those white space | |
1249 | characters. | |
1250 | ||
1251 | =item hooks | |
1252 | XML::Simple does not support hooks. | |
1253 | ||
1254 | =back | |
1255 | ||
1256 | =chapter SEE ALSO | |
1257 | ||
1258 | L<XML::Compile> for processing XML when a schema is available. When you | |
1259 | have a schema, the data and structure of your message get validated. | |
1260 | ||
1261 | L<XML::Simple>, the original implementation which interface is followed | |
1262 | as closely as possible. | |
1263 | ||
1264 | =cut |
0 | =encoding utf8 | |
1 | ||
2 | =head1 NAME | |
3 | ||
4 | XML::LibXML::Simple - XML::LibXML clone of XML::Simple::XMLin() | |
5 | ||
6 | =head1 INHERITANCE | |
7 | ||
8 | XML::LibXML::Simple | |
9 | is a Exporter | |
10 | ||
11 | =head1 SYNOPSIS | |
12 | ||
13 | my $xml = ...; # filename, fh, string, or XML::LibXML-node | |
14 | ||
15 | Imperative: | |
16 | ||
17 | use XML::LibXML::Simple qw(XMLin); | |
18 | my $data = XMLin $xml, %options; | |
19 | ||
20 | Or the Object Oriented way: | |
21 | ||
22 | use XML::LibXML::Simple (); | |
23 | my $xs = XML::LibXML::Simple->new(%options); | |
24 | my $data = $xs->XMLin($xml, %options); | |
25 | ||
26 | =head1 DESCRIPTION | |
27 | ||
28 | This module is a blunt rewrite of XML::Simple (by Grant McLean) to | |
29 | use the XML::LibXML parser for XML structures, where the original | |
30 | uses plain Perl or SAX parsers. | |
31 | ||
32 | B<Be warned:> this module thinks to be smart. You may very well shoot | |
33 | yourself in the foot with this DWIMmery. Read the whole manual page | |
34 | at least once before you start using it. If your XML is described in | |
35 | a schema or WSDL, then use XML::Compile for maintainable code. | |
36 | ||
37 | =head1 METHODS | |
38 | ||
39 | =head2 Constructors | |
40 | ||
41 | =over 4 | |
42 | ||
43 | =item XML::LibXML::Simple-E<gt>B<new>(%options) | |
44 | ||
45 | Instantiate an object, which can be used to call L<XMLin()|XML::LibXML::Simple/"Translators"> on. You can | |
46 | provide %options to this constructor (to be reused for each call to XMLin) | |
47 | and with each call of XMLin (to be used once) | |
48 | ||
49 | For descriptions of the %options see the L</DETAILS> | |
50 | section of this manual page. | |
51 | ||
52 | =back | |
53 | ||
54 | =head2 Translators | |
55 | ||
56 | =over 4 | |
57 | ||
58 | =item $obj-E<gt>B<XMLin>($xmldata, %options) | |
59 | ||
60 | For $xmldata and descriptions of the %options see the L</DETAILS> | |
61 | section of this manual page. | |
62 | ||
63 | =back | |
64 | ||
65 | =head1 FUNCTIONS | |
66 | ||
67 | The functions C<XMLin> (exported implictly) and C<xml_in> | |
68 | (exported on request) simply call C<<XML::LibXML::Simple->new->XMLin() >> | |
69 | with the provided parameters. | |
70 | ||
71 | =head1 DETAILS | |
72 | ||
73 | =head2 Parameter $xmldata | |
74 | ||
75 | As first parameter to L<XMLin()|XML::LibXML::Simple/"Translators"> must provide the XML message to be | |
76 | translated into a Perl structure. Choose one of the following: | |
77 | ||
78 | =over 4 | |
79 | ||
80 | =item A filename | |
81 | ||
82 | If the filename contains no directory components, C<XMLin()> will look for the | |
83 | file in each directory in the SearchPath (see OPTIONS below) and in the | |
84 | current directory. eg: | |
85 | ||
86 | $data = XMLin('/etc/params.xml', %options); | |
87 | ||
88 | =item A dash (-) | |
89 | ||
90 | Parse from STDIN. | |
91 | ||
92 | $data = XMLin('-', %options); | |
93 | ||
94 | =item undef | |
95 | ||
96 | [deprecated] | |
97 | If there is no XML specifier, C<XMLin()> will check the script directory and | |
98 | each of the SearchPath directories for a file with the same name as the script | |
99 | but with the extension '.xml'. Note: if you wish to specify options, you | |
100 | must specify the value 'undef'. eg: | |
101 | ||
102 | $data = XMLin(undef, ForceArray => 1); | |
103 | ||
104 | This feature is available for backwards compatibility with XML::Simple, | |
105 | but quite sensitive. You can easily hit the wrong xml file as input. | |
106 | Please do not use it: always use an explicit filename. | |
107 | ||
108 | =item A string of XML | |
109 | ||
110 | A string containing XML (recognised by the presence of '<' and '>' characters) | |
111 | will be parsed directly. eg: | |
112 | ||
113 | $data = XMLin('<opt username="bob" password="flurp" />', %options); | |
114 | ||
115 | =item An IO::Handle object | |
116 | ||
117 | In this case, XML::LibXML::Parser will read the XML data directly from | |
118 | the provided file. | |
119 | ||
120 | # $fh = IO::File->new('/etc/params.xml') or die; | |
121 | open my $fh, '<:encoding(utf8)', '/etc/params.xml' or die; | |
122 | ||
123 | $data = XMLin($fh, %options); | |
124 | ||
125 | =item An XML::LibXML::Document or ::Element | |
126 | ||
127 | [Not available in XML::Simple] When you have a pre-parsed XML::LibXML | |
128 | node, you can pass that. | |
129 | ||
130 | =back | |
131 | ||
132 | =head2 Parameter %options | |
133 | ||
134 | L<XML::LibXML::Simple|XML::LibXML::Simple> supports most options defined by XML::Simple, so | |
135 | the interface is quite compatible. Minor changes apply. This explanation | |
136 | is extracted from the XML::Simple manual-page. | |
137 | ||
138 | =over 4 | |
139 | ||
140 | =item * | |
141 | ||
142 | check out C<ForceArray> because you'll almost certainly want to turn it on | |
143 | ||
144 | =item * | |
145 | ||
146 | make sure you know what the C<KeyAttr> option does and what its default | |
147 | value is because it may surprise you otherwise. | |
148 | ||
149 | =item * | |
150 | ||
151 | Option names are case in-sensitive so you can use the mixed case versions | |
152 | shown here; you can add underscores between the words (eg: key_attr) | |
153 | if you like. | |
154 | ||
155 | =back | |
156 | ||
157 | In alphabetic order: | |
158 | ||
159 | =over 4 | |
160 | ||
161 | =item ContentKey => 'keyname' I<# seldom used> | |
162 | ||
163 | When text content is parsed to a hash value, this option lets you specify a | |
164 | name for the hash key to override the default 'content'. So for example: | |
165 | ||
166 | XMLin('<opt one="1">Two</opt>', ContentKey => 'text') | |
167 | ||
168 | will parse to: | |
169 | ||
170 | { one => 1, text => 'Two' } | |
171 | ||
172 | instead of: | |
173 | ||
174 | { one => 1, content => 'Two' } | |
175 | ||
176 | You can also prefix your selected key name with a '-' character to have | |
177 | C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after | |
178 | array folding. For example: | |
179 | ||
180 | XMLin( | |
181 | '<opt><item name="one">First</item><item name="two">Second</item></opt>', | |
182 | KeyAttr => {item => 'name'}, | |
183 | ForceArray => [ 'item' ], | |
184 | ContentKey => '-content' | |
185 | ) | |
186 | ||
187 | will parse to: | |
188 | ||
189 | { | |
190 | item => { | |
191 | one => 'First' | |
192 | two => 'Second' | |
193 | } | |
194 | } | |
195 | ||
196 | rather than this (without the '-'): | |
197 | ||
198 | { | |
199 | item => { | |
200 | one => { content => 'First' } | |
201 | two => { content => 'Second' } | |
202 | } | |
203 | } | |
204 | ||
205 | =item ForceArray => 1 I<# important> | |
206 | ||
207 | This option should be set to '1' to force nested elements to be represented | |
208 | as arrays even when there is only one. Eg, with ForceArray enabled, this | |
209 | XML: | |
210 | ||
211 | <opt> | |
212 | <name>value</name> | |
213 | </opt> | |
214 | ||
215 | would parse to this: | |
216 | ||
217 | { name => [ 'value' ] } | |
218 | ||
219 | instead of this (the default): | |
220 | ||
221 | { name => 'value' } | |
222 | ||
223 | This option is especially useful if the data structure is likely to be written | |
224 | back out as XML and the default behaviour of rolling single nested elements up | |
225 | into attributes is not desirable. | |
226 | ||
227 | If you are using the array folding feature, you should almost certainly | |
228 | enable this option. If you do not, single nested elements will not be | |
229 | parsed to arrays and therefore will not be candidates for folding to a | |
230 | hash. (Given that the default value of 'KeyAttr' enables array folding, | |
231 | the default value of this option should probably also have been enabled | |
232 | as well). | |
233 | ||
234 | =item ForceArray => [ names ] I<# important> | |
235 | ||
236 | This alternative (and preferred) form of the 'ForceArray' option allows you to | |
237 | specify a list of element names which should always be forced into an array | |
238 | representation, rather than the 'all or nothing' approach above. | |
239 | ||
240 | It is also possible to include compiled regular | |
241 | expressions in the list --any element names which match the pattern | |
242 | will be forced to arrays. If the list contains only a single regex, | |
243 | then it is not necessary to enclose it in an arrayref. Eg: | |
244 | ||
245 | ForceArray => qr/_list$/ | |
246 | ||
247 | =item ForceContent => 1 I<# seldom used> | |
248 | ||
249 | When C<XMLin()> parses elements which have text content as well as attributes, | |
250 | the text content must be represented as a hash value rather than a simple | |
251 | scalar. This option allows you to force text content to always parse to | |
252 | a hash value even when there are no attributes. So for example: | |
253 | ||
254 | XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) | |
255 | ||
256 | will parse to: | |
257 | ||
258 | { | |
259 | x => { content => 'text1' }, | |
260 | y => { a => 2, content => 'text2' } | |
261 | } | |
262 | ||
263 | instead of: | |
264 | ||
265 | { | |
266 | x => 'text1', | |
267 | y => { 'a' => 2, 'content' => 'text2' } | |
268 | } | |
269 | ||
270 | =item GroupTags => { grouping tag => grouped tag } I<# handy> | |
271 | ||
272 | You can use this option to eliminate extra levels of indirection in your Perl | |
273 | data structure. For example this XML: | |
274 | ||
275 | <opt> | |
276 | <searchpath> | |
277 | <dir>/usr/bin</dir> | |
278 | <dir>/usr/local/bin</dir> | |
279 | <dir>/usr/X11/bin</dir> | |
280 | </searchpath> | |
281 | </opt> | |
282 | ||
283 | Would normally be read into a structure like this: | |
284 | ||
285 | { | |
286 | searchpath => { | |
287 | dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
288 | } | |
289 | } | |
290 | ||
291 | But when read in with the appropriate value for 'GroupTags': | |
292 | ||
293 | my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); | |
294 | ||
295 | It will return this simpler structure: | |
296 | ||
297 | { | |
298 | searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |
299 | } | |
300 | ||
301 | The grouping element (C<< <searchpath> >> in the example) must not contain any | |
302 | attributes or elements other than the grouped element. | |
303 | ||
304 | You can specify multiple 'grouping element' to 'grouped element' mappings in | |
305 | the same hashref. If this option is combined with C<KeyAttr>, the array | |
306 | folding will occur first and then the grouped element names will be eliminated. | |
307 | ||
308 | =item HookNodes => CODE | |
309 | ||
310 | Select document nodes to apply special tricks. | |
311 | Introduced in [0.96], not available in XML::Simple. | |
312 | ||
313 | When this option is provided, the CODE will be called once the XML DOM | |
314 | tree is ready to get transformed into Perl. Your CODE should return | |
315 | either C<undef> (nothing to do) or a HASH which maps values of | |
316 | unique_key (see XML::LibXML::Node method C<unique_key> onto CODE | |
317 | references to be called. | |
318 | ||
319 | Once the translater from XML into Perl reaches a selected node, it will | |
320 | call your routine specific for that node. That triggering node found | |
321 | is the only parameter. When you return C<undef>, the node will not be | |
322 | found in the final result. You may return any data (even the node itself) | |
323 | which will be included in the final result as is, under the name of the | |
324 | original node. | |
325 | ||
326 | Example: | |
327 | ||
328 | my $out = XMLin $file, HookNodes => \&protect_html; | |
329 | ||
330 | sub protect_html($$) | |
331 | { # $obj is the instantated XML::Compile::Simple object | |
332 | # $xml is a XML::LibXML::Element to get transformed | |
333 | my ($obj, $xml) = @_; | |
334 | ||
335 | my %hooks; # collects the table of hooks | |
336 | ||
337 | # do an xpath search for HTML | |
338 | my $xpc = XML::LibXML::XPathContext->new($xml); | |
339 | my @nodes = $xpc->findNodes(...); #XXX | |
340 | @nodes or return undef; | |
341 | ||
342 | my $as_text = sub { $_[0]->toString(0) }; # as text | |
343 | # $as_node = sub { $_[0] }; # as node | |
344 | # $skip = sub { undef }; # not at all | |
345 | ||
346 | # the same behavior for all xpath nodes, in this example | |
347 | $hook{$_->unique_key} = $as_text | |
348 | for @nodes; | |
349 | ||
350 | \%hook; | |
351 | } | |
352 | ||
353 | =item KeepRoot => 1 I<# handy> | |
354 | ||
355 | In its attempt to return a data structure free of superfluous detail and | |
356 | unnecessary levels of indirection, C<XMLin()> normally discards the root | |
357 | element name. Setting the 'KeepRoot' option to '1' will cause the root element | |
358 | name to be retained. So after executing this code: | |
359 | ||
360 | $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) | |
361 | ||
362 | You'll be able to reference the tempdir as | |
363 | C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default | |
364 | C<$config-E<gt>{tempdir}>. | |
365 | ||
366 | =item KeyAttr => [ list ] I<# important> | |
367 | ||
368 | This option controls the 'array folding' feature which translates nested | |
369 | elements from an array to a hash. It also controls the 'unfolding' of hashes | |
370 | to arrays. | |
371 | ||
372 | For example, this XML: | |
373 | ||
374 | <opt> | |
375 | <user login="grep" fullname="Gary R Epstein" /> | |
376 | <user login="stty" fullname="Simon T Tyson" /> | |
377 | </opt> | |
378 | ||
379 | would, by default, parse to this: | |
380 | ||
381 | { | |
382 | user => [ | |
383 | { login => 'grep', | |
384 | fullname => 'Gary R Epstein' | |
385 | }, | |
386 | { login => 'stty', | |
387 | fullname => 'Simon T Tyson' | |
388 | } | |
389 | ] | |
390 | } | |
391 | ||
392 | If the option 'KeyAttr => "login"' were used to specify that the 'login' | |
393 | attribute is a key, the same XML would parse to: | |
394 | ||
395 | { | |
396 | user => { | |
397 | stty => { fullname => 'Simon T Tyson' }, | |
398 | grep => { fullname => 'Gary R Epstein' } | |
399 | } | |
400 | } | |
401 | ||
402 | The key attribute names should be supplied in an arrayref if there is more | |
403 | than one. C<XMLin()> will attempt to match attribute names in the order | |
404 | supplied. | |
405 | ||
406 | Note 1: The default value for 'KeyAttr' is C<< ['name', 'key', 'id'] >>. | |
407 | If you do not want folding on input or unfolding on output you must | |
408 | setting this option to an empty list to disable the feature. | |
409 | ||
410 | Note 2: If you wish to use this option, you should also enable the | |
411 | C<ForceArray> option. Without 'ForceArray', a single nested element will be | |
412 | rolled up into a scalar rather than an array and therefore will not be folded | |
413 | (since only arrays get folded). | |
414 | ||
415 | =item KeyAttr => { list } I<# important> | |
416 | ||
417 | This alternative (and preferred) method of specifying the key attributes | |
418 | allows more fine grained control over which elements are folded and on which | |
419 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause | |
420 | any package elements to be folded on the 'id' attribute. No other elements | |
421 | which have an 'id' attribute will be folded at all. | |
422 | ||
423 | Two further variations are made possible by prefixing a '+' or a '-' character | |
424 | to the attribute name: | |
425 | ||
426 | The option 'KeyAttr => { user => "+login" }' will cause this XML: | |
427 | ||
428 | <opt> | |
429 | <user login="grep" fullname="Gary R Epstein" /> | |
430 | <user login="stty" fullname="Simon T Tyson" /> | |
431 | </opt> | |
432 | ||
433 | to parse to this data structure: | |
434 | ||
435 | { | |
436 | user => { | |
437 | stty => { | |
438 | fullname => 'Simon T Tyson', | |
439 | login => 'stty' | |
440 | }, | |
441 | grep => { | |
442 | fullname => 'Gary R Epstein', | |
443 | login => 'grep' | |
444 | } | |
445 | } | |
446 | } | |
447 | ||
448 | The '+' indicates that the value of the key attribute should be copied | |
449 | rather than moved to the folded hash key. | |
450 | ||
451 | A '-' prefix would produce this result: | |
452 | ||
453 | { | |
454 | user => { | |
455 | stty => { | |
456 | fullname => 'Simon T Tyson', | |
457 | -login => 'stty' | |
458 | }, | |
459 | grep => { | |
460 | fullname => 'Gary R Epstein', | |
461 | -login => 'grep' | |
462 | } | |
463 | } | |
464 | } | |
465 | ||
466 | =item NoAttr => 1 I<# handy> | |
467 | ||
468 | When used with C<XMLin()>, any attributes in the XML will be ignored. | |
469 | ||
470 | =item NormaliseSpace => 0 | 1 | 2 I<# handy> | |
471 | ||
472 | This option controls how whitespace in text content is handled. Recognised | |
473 | values for the option are: | |
474 | ||
475 | =over 4 | |
476 | ||
477 | =item "0" | |
478 | ||
479 | (default) whitespace is passed through unaltered (except of course for the | |
480 | normalisation of whitespace in attribute values which is mandated by the XML | |
481 | recommendation) | |
482 | ||
483 | =item "1" | |
484 | ||
485 | whitespace is normalised in any value used as a hash key (normalising means | |
486 | removing leading and trailing whitespace and collapsing sequences of whitespace | |
487 | characters to a single space) | |
488 | ||
489 | =item "2" | |
490 | ||
491 | whitespace is normalised in all text content | |
492 | ||
493 | =back | |
494 | ||
495 | Note: you can spell this option with a 'z' if that is more natural for you. | |
496 | ||
497 | =item Parser => OBJECT | |
498 | ||
499 | You may pass your own XML::LibXML object, in stead of having one | |
500 | created for you. This is useful when you need specific configuration | |
501 | on that object (See XML::LibXML::Parser) or have implemented your | |
502 | own extension to that object. | |
503 | ||
504 | The internally created parser object is configured in safe mode. | |
505 | Read the XML::LibXML::Parser manual about security issues with | |
506 | certain parameter settings. The default is unsafe! | |
507 | ||
508 | =item ParserOpts => HASH|ARRAY | |
509 | ||
510 | Pass parameters to the creation of a new internal parser object. You | |
511 | can overrule the options which will create a safe parser. It may be more | |
512 | readible to use the C<Parser> parameter. | |
513 | ||
514 | =item SearchPath => [ list ] I<# handy> | |
515 | ||
516 | If you pass C<XMLin()> a filename, but the filename include no directory | |
517 | component, you can use this option to specify which directories should be | |
518 | searched to locate the file. You might use this option to search first in the | |
519 | user's home directory, then in a global directory such as /etc. | |
520 | ||
521 | If a filename is provided to C<XMLin()> but SearchPath is not defined, the | |
522 | file is assumed to be in the current directory. | |
523 | ||
524 | If the first parameter to C<XMLin()> is undefined, the default SearchPath | |
525 | will contain only the directory in which the script itself is located. | |
526 | Otherwise the default SearchPath will be empty. | |
527 | ||
528 | =item SuppressEmpty => 1 | '' | undef | |
529 | ||
530 | [0.99] What to do with empty elements (no attributes and no content). The | |
531 | default behaviour is to represent them as empty hashes. Setting this | |
532 | option to a true value (eg: 1) will cause empty elements to be skipped | |
533 | altogether. Setting the option to 'undef' or the empty string will | |
534 | cause empty elements to be represented as the undefined value or the | |
535 | empty string respectively. | |
536 | ||
537 | =item ValueAttr => [ names ] I<# handy> | |
538 | ||
539 | Use this option to deal elements which always have a single attribute and no | |
540 | content. Eg: | |
541 | ||
542 | <opt> | |
543 | <colour value="red" /> | |
544 | <size value="XXL" /> | |
545 | </opt> | |
546 | ||
547 | Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: | |
548 | ||
549 | { | |
550 | colour => 'red', | |
551 | size => 'XXL' | |
552 | } | |
553 | ||
554 | instead of this (the default): | |
555 | ||
556 | { | |
557 | colour => { value => 'red' }, | |
558 | size => { value => 'XXL' } | |
559 | } | |
560 | ||
561 | =item NsExpand => 0 I<advised> | |
562 | ||
563 | When name-spaces are used, the default behavior is to include the | |
564 | prefix in the key name. However, this is very dangerous: the prefixes | |
565 | can be changed without a change of the XML message meaning. Therefore, | |
566 | you can better use this C<NsExpand> option. The downside, however, is | |
567 | that the labels get very long. | |
568 | ||
569 | Without this option: | |
570 | ||
571 | <record xmlns:x="http://xyz"> | |
572 | <x:field1>42</x:field1> | |
573 | </record> | |
574 | <record xmlns:y="http://xyz"> | |
575 | <y:field1>42</y:field1> | |
576 | </record> | |
577 | ||
578 | translates into | |
579 | ||
580 | { 'x:field1' => 42 } | |
581 | { 'y:field1' => 42 } | |
582 | ||
583 | but both source component have exactly the same meaning. When C<NsExpand> | |
584 | is used, the result is: | |
585 | ||
586 | { '{http://xyz}field1' => 42 } | |
587 | { '{http://xyz}field1' => 42 } | |
588 | ||
589 | Of course, addressing these fields is more work. It is advised to implement | |
590 | it like this: | |
591 | ||
592 | my $ns = 'http://xyz'; | |
593 | $data->{"{$ns}field1"}; | |
594 | ||
595 | =item NsStrip => 0 I<sloppy coding> | |
596 | ||
597 | [not available in XML::Simple] | |
598 | Namespaces are really important to avoid name collissions, but they are | |
599 | a bit of a hassle. To do it correctly, use option C<NsExpand>. To do | |
600 | it sloppy, use C<NsStrip>. With this option set, the above example will | |
601 | return | |
602 | ||
603 | { field1 => 42 } | |
604 | { field1 => 42 } | |
605 | ||
606 | =back | |
607 | ||
608 | =head1 EXAMPLES | |
609 | ||
610 | When C<XMLin()> reads the following very simple piece of XML: | |
611 | ||
612 | <opt username="testuser" password="frodo"></opt> | |
613 | ||
614 | it returns the following data structure: | |
615 | ||
616 | { | |
617 | username => 'testuser', | |
618 | password => 'frodo' | |
619 | } | |
620 | ||
621 | The identical result could have been produced with this alternative XML: | |
622 | ||
623 | <opt username="testuser" password="frodo" /> | |
624 | ||
625 | Or this (although see 'ForceArray' option for variations): | |
626 | ||
627 | <opt> | |
628 | <username>testuser</username> | |
629 | <password>frodo</password> | |
630 | </opt> | |
631 | ||
632 | Repeated nested elements are represented as anonymous arrays: | |
633 | ||
634 | <opt> | |
635 | <person firstname="Joe" lastname="Smith"> | |
636 | <email>joe@smith.com</email> | |
637 | <email>jsmith@yahoo.com</email> | |
638 | </person> | |
639 | <person firstname="Bob" lastname="Smith"> | |
640 | <email>bob@smith.com</email> | |
641 | </person> | |
642 | </opt> | |
643 | ||
644 | { | |
645 | person => [ | |
646 | { email => [ 'joe@smith.com', 'jsmith@yahoo.com' ], | |
647 | firstname => 'Joe', | |
648 | lastname => 'Smith' | |
649 | }, | |
650 | { email => 'bob@smith.com', | |
651 | firstname => 'Bob', | |
652 | lastname => 'Smith' | |
653 | } | |
654 | ] | |
655 | } | |
656 | ||
657 | Nested elements with a recognised key attribute are transformed (folded) from | |
658 | an array into a hash keyed on the value of that attribute (see the C<KeyAttr> | |
659 | option): | |
660 | ||
661 | <opt> | |
662 | <person key="jsmith" firstname="Joe" lastname="Smith" /> | |
663 | <person key="tsmith" firstname="Tom" lastname="Smith" /> | |
664 | <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> | |
665 | </opt> | |
666 | ||
667 | { | |
668 | person => { | |
669 | jbloggs => { | |
670 | firstname => 'Joe', | |
671 | lastname => 'Bloggs' | |
672 | }, | |
673 | tsmith => { | |
674 | firstname => 'Tom', | |
675 | lastname => 'Smith' | |
676 | }, | |
677 | jsmith => { | |
678 | firstname => 'Joe', | |
679 | lastname => 'Smith' | |
680 | } | |
681 | } | |
682 | } | |
683 | ||
684 | The <anon> tag can be used to form anonymous arrays: | |
685 | ||
686 | <opt> | |
687 | <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> | |
688 | <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> | |
689 | <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> | |
690 | <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> | |
691 | </opt> | |
692 | ||
693 | { | |
694 | head => [ [ 'Col 1', 'Col 2', 'Col 3' ] ], | |
695 | data => [ [ 'R1C1', 'R1C2', 'R1C3' ], | |
696 | [ 'R2C1', 'R2C2', 'R2C3' ], | |
697 | [ 'R3C1', 'R3C2', 'R3C3' ] | |
698 | ] | |
699 | } | |
700 | ||
701 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if | |
702 | the surrounding tags for an XML document contain only an anonymous array the | |
703 | arrayref will be returned directly rather than the usual hashref: | |
704 | ||
705 | <opt> | |
706 | <anon><anon>Col 1</anon><anon>Col 2</anon></anon> | |
707 | <anon><anon>R1C1</anon><anon>R1C2</anon></anon> | |
708 | <anon><anon>R2C1</anon><anon>R2C2</anon></anon> | |
709 | </opt> | |
710 | ||
711 | [ | |
712 | [ 'Col 1', 'Col 2' ], | |
713 | [ 'R1C1', 'R1C2' ], | |
714 | [ 'R2C1', 'R2C2' ] | |
715 | ] | |
716 | ||
717 | Elements which only contain text content will simply be represented as a | |
718 | scalar. Where an element has both attributes and text content, the element | |
719 | will be represented as a hashref with the text content in the 'content' key | |
720 | (see the C<ContentKey> option): | |
721 | ||
722 | <opt> | |
723 | <one>first</one> | |
724 | <two attr="value">second</two> | |
725 | </opt> | |
726 | ||
727 | { | |
728 | one => 'first', | |
729 | two => { attr => 'value', content => 'second' } | |
730 | } | |
731 | ||
732 | Mixed content (elements which contain both text content and nested elements) | |
733 | will be not be represented in a useful way - element order and significant | |
734 | whitespace will be lost. If you need to work with mixed content, then | |
735 | XML::Simple is not the right tool for your job - check out the next section. | |
736 | ||
737 | =head2 Differences to XML::Simple | |
738 | ||
739 | In general, the output and the options are equivalent, although this | |
740 | module has some differences with XML::Simple to be aware of. | |
741 | ||
742 | =over 4 | |
743 | ||
744 | =item only L<XMLin()|XML::LibXML::Simple/"Translators"> is supported | |
745 | ||
746 | If you want to write XML then use a schema (for instance with | |
747 | XML::Compile). Do not attempt to create XML by hand! If you still | |
748 | think you need it, then have a look at XMLout() as implemented by | |
749 | XML::Simple or any of a zillion template systems. | |
750 | ||
751 | =item no "variables" option | |
752 | ||
753 | IMO, you should use a templating system if you want variables filled-in | |
754 | in the input: it is not a task for this module. | |
755 | ||
756 | =item ForceArray options | |
757 | ||
758 | There are a few small differences in the result of the C<forcearray> option, | |
759 | because XML::Simple seems to behave inconsequently. | |
760 | ||
761 | =item hooks | |
762 | ||
763 | XML::Simple does not support hooks. | |
764 | ||
765 | =back | |
766 | ||
767 | =head1 SEE ALSO | |
768 | ||
769 | L<XML::Compile> for processing XML when a schema is available. When you | |
770 | have a schema, the data and structure of your message get validated. | |
771 | ||
772 | L<XML::Simple>, the original implementation which interface is followed | |
773 | as closely as possible. | |
774 | ||
775 | =head1 COPYRIGHTS | |
776 | ||
777 | The interface design and large parts of the documentation were taken | |
778 | from the L<XML::Simple> module, written by | |
779 | Grant McLean E<lt>grantm@cpan.orgE<gt> | |
780 | ||
781 | Copyrights of the perl code and the related documentation by | |
782 | 2008-2020 by [Mark Overmeer <markov@cpan.org>]. For other contributors see ChangeLog. | |
783 | ||
784 | This program is free software; you can redistribute it and/or modify it | |
785 | under the same terms as Perl itself. | |
786 | See F<http://dev.perl.org/licenses/> | |
787 |