Codebase list libhtml-defang-perl / 2846df9
New upstream version 1.07 Jonas Smedegaard 5 years ago
13 changed file(s) with 1080 addition(s) and 716 deletion(s). Raw diff Collapse all Expand all
00 Revision history for Perl extension HTML::Defang.
1
2 1.07 Tue Jul 3 15:30:00 2018
3 - More fixes for 5.26 (\{ in regexp)
4 - Fixes for perl 5.8 (thanks Michael McClimon)
5
6 1.06 Mon Jun 26 14:30:00 2018
7 - Fix for 5.26 (\{ in regexp)
8 - Require at least perl 5.10
9
10 1.05 Tue Jun 19 15:30:00 2018
11 - Many bug fixes, improvements, speed ups and better nested HTML handling
112
213 1.04 Mon Jan 03 12:00:00 2011
314 - Change defang_and_add_to_output to delay the defanging until after the current tag is actually emmitted to the output
99 t/05_callbacks.t
1010 t/06_unicode.t
1111 META.yml Module meta-data (added by MakeMaker)
12 META.json Module JSON meta-data (added by MakeMaker)
0 {
1 "abstract" : "Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks.",
2 "author" : [
3 "Rob Mueller <cpan@robm.fastmail.fm>"
4 ],
5 "dynamic_config" : 1,
6 "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142690",
7 "license" : [
8 "unknown"
9 ],
10 "meta-spec" : {
11 "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
12 "version" : "2"
13 },
14 "name" : "HTML-Defang",
15 "no_index" : {
16 "directory" : [
17 "t",
18 "inc"
19 ]
20 },
21 "prereqs" : {
22 "build" : {
23 "requires" : {
24 "ExtUtils::MakeMaker" : "0"
25 }
26 },
27 "configure" : {
28 "requires" : {
29 "ExtUtils::MakeMaker" : "0"
30 }
31 },
32 "runtime" : {
33 "requires" : {}
34 }
35 },
36 "release_status" : "stable",
37 "version" : 1.07
38 }
0 --- #YAML:1.0
1 name: HTML-Defang
2 version: 1.04
3 abstract: Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks.
4 license: ~
5 author:
6 - Kurian Jose Aerthail <cpan@kurianja.fastmail.fm>
7 generated_by: ExtUtils::MakeMaker version 6.42
8 distribution_type: module
9 requires:
0 ---
1 abstract: 'Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks.'
2 author:
3 - 'Rob Mueller <cpan@robm.fastmail.fm>'
4 build_requires:
5 ExtUtils::MakeMaker: '0'
6 configure_requires:
7 ExtUtils::MakeMaker: '0'
8 dynamic_config: 1
9 generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142690'
10 license: unknown
1011 meta-spec:
11 url: http://module-build.sourceforge.net/META-spec-v1.3.html
12 version: 1.3
12 url: http://module-build.sourceforge.net/META-spec-v1.4.html
13 version: '1.4'
14 name: HTML-Defang
15 no_index:
16 directory:
17 - t
18 - inc
19 requires: {}
20 version: 1.07
77 PREREQ_PM => {}, # e.g., Module::Name => 1.1
88 ($] >= 5.005 ? ## Add these new keywords supported since 5.005
99 (ABSTRACT_FROM => 'lib/HTML/Defang.pm', # retrieve abstract from module
10 AUTHOR => 'Kurian Jose Aerthail <cpan@kurianja.fastmail.fm>') : ()),
10 AUTHOR => 'Rob Mueller <cpan@robm.fastmail.fm>') : ()),
1111 );
0 HTML-Defang version 1.04
0 HTML-Defang version 1.07
11 ========================
22
33 This module accepts an input HTML and/or CSS string and removes any
2020
2121 COPYRIGHT AND LICENCE
2222
23 Copyright (C) 2003-2010 by Opera Software Australia Pty Ltd
23 Copyright (C) 2003-2018 by FastMail Pty Ltd
2424
2525 This library is free software; you can redistribute it and/or modify
2626 it under the same terms as Perl itself.
0 #!/usr/bin/perl -w
1
20 package HTML::Defang;
31
42 =head1 NAME
1715 url_callback => \&DefangUrlCallback,
1816 css_callback => \&DefangCssCallback,
1917 attribs_to_callback => [ qw(border src) ],
20 attribs_callback => \&DefangAttribsCallback
18 attribs_callback => \&DefangAttribsCallback,
19 content_callback => \&ContentCallback,
2120 );
2221
2322 my $SanitizedHtml = $Defang->defang($InputHtml);
8180 return DEFANG_NONE;
8281 }
8382
83 # Callback for all content between tags (except <style>, <script>, etc)
84 sub DefangContentCallback {
85 my ($Self, $Defang, $ContentR) = @_;
86
87 $$ContentR =~ s/remove this content//;
88 }
89
8490 =head1 DESCRIPTION
8591
8692 This module accepts an input HTML and/or CSS string and removes any executable code including scripting, embedded objects, applets, etc., and neutralises any XSS attacks. A whitelist based approach is used which means only HTML known to be safe is allowed through.
120126 %EXPORT_TAGS = (all => [qw(@FormTags DEFANG_NONE DEFANG_ALWAYS DEFANG_DEFAULT)]);
121127 Exporter::export_ok_tags('all');
122128
129 use 5.008;
123130 use strict;
124131 use warnings;
125132
126 our $VERSION=1.04;
133 our $VERSION=1.07;
127134
128135 use constant DEFANG_NONE => 0;
129136 use constant DEFANG_ALWAYS => 1;
137144 our @FormTags = qw(form input textarea select option button fieldset label legend multicol nextid optgroup);
138145
139146 # Some regexps for matching HTML tags + key=value attributes
140 my $AttrKeyStartLineRE = qr/(?:[^=<>\s\/\\]{1,}|[\/]\s*(?==))/;
147 my $AttrKeyStartLineRE = qr/(?:[^=<>\s\/\\]{1,}|[\/](?!\s*>))/;
141148 my $AttrKeyRE = qr/(?<=[\s'"\/])$AttrKeyStartLineRE/;
142 my $AttrValRE = qr/[^>\s'"`][^>\s]*|'[^']{0,16384}?'|"[^"]{0,16384}?"|`[^`]{0,16384}?`/;
149 my $AttrValRE = qr/[^>\s'"`][^>\s]*|'[^']*?'|"[^"]*?"|`[^`]*?`/;
143150 my $AttributesRE = qr/(?:(?:$AttrKeyRE\s*)?(?:=\s*$AttrValRE\s*)?)*/;
144151 my $TagNameRE = qr/[A-Za-z][A-Za-z0-9\#\&\;\:\!_-]*/;
145152
146 my $StyleSelectors = qr/[^{}\s][^{}]*?/;
153 my $StyleSelectors = qr/[^{}\s][^{}]{0,1024}?/;
147154 my $StyleName = qr/[^:}\s][^:{}]*?/;
148155 my $StyleValue = qr/[^;}\s][^;}]*|.*$/;
149156 my $StyleRule = qr/$StyleName\s*:\s*$StyleValue\s*/;
150 my $StyleRules = qr/\s*(?:$StyleRule)?(?:;\s*$StyleRule)*(?:;\s*)?/;
151
152 my $Fonts = qr/"?([A-Za-z0-9\s-]+)"?/;
157 my $StyleRules = qr/\s*(?:$StyleRule)?(?:;\s*$StyleRule)*(?:;\s*)*/;
158 my $StyleMediaSelector = qr/\@media\b[^{]*/;
159 my $RECStyleMediaSelector = qr{\G(\s*)($StyleMediaSelector)(\{)(\s*)}so;
160 my $RECStyleNaked = qr/\G(\s*)()()()($StyleRules)()(\s*)/o;
161 my $RECStyleSelected = qr/\G(\s*)((?:$StyleSelectors)?)(\s*)(\{)($StyleRules)(\})(\s*)/o;
162
163 my $Fonts = qr/["']?([A-Za-z0-9\s-]+)["']?/;
153164 my $Alignments = qr/(absbottom|absmiddle|all|autocentre|baseline|bottom|center|justify|left|middle|none|right|texttop|top)/;
154165
155166 my $Executables = '([^@]\.com|'.
156167 '.*\.(exe|cmd|bat|pif|scr|sys|sct|lnk|dll'.
157168 '|vbs?|vbe|hta|shb|shs|hlp|chm|eml|wsf|wsh|js'.
158169 '|asx|wm.|mdb|mht|msi|msp|cpl|lib|reg))';
159 my $SrcBanStd = qr/^([A-Za-z]*script|.*\&{|mocha|about|opera|mailto:|hcp:|\/(dev|proc)|\\|file|smb|cid:${Executables}(@|\?|$))/i;
170 my $SrcBanStd = qr/^([A-Za-z]*script|.*\&\{|mocha|about|opera|mailto:|hcp:|\/(dev|proc)|\\|file|smb|cid:${Executables}(@|\?|$))/i;
160171
161172 my %Rules =
162173 (
171182 "coords" => qr/^(\d+,)+\d+$/i,
172183 "datetime" => qr/^\d\d\d\d-\d\d-\d\d.{0,5}\d\d:\d\d:\d\d.{0,5}$/,
173184 "dir" => qr/^(ltr|rtl)$/i,
185 "empty" => qr/^$/i,
174186 "eudora" => qr/^(autourl)$/i,
175187 "font-face" => qr/^((${Fonts})[,\s]*)+$/i,
176188 "form-enctype" => qr/^(application\/x-www-form-urlencoded|multipart\/form-data)$/i,
177189 "form-method" => qr/^(get|post)$/i,
178190 "frame" => qr/^(void|above|below|hsides|vsides|lhs|rhs|box|border)$/i,
179191 # href: Not javascript, vbs or vbscript
180 "href" => [ qr/(?i:^([a-z]*script\s*:|.*\&{|mocha|hcp|opera\s*:|about\s*:|smb|\/dev\/|<))|[^\x00-\x7f]/ ],
192 "href" => [ qr/^((?:[a-z]*script|mocha|opera|about|data|tcl)\s*:|.*\&\{|hcp|smb|\/dev\/|<)/i ],
181193 "usemap-href" => qr/^#[A-Za-z0-9_.-]+$/, # this is not really a href at all!
182194 "input-size" => qr/^(\d{1,4})$/, # some browsers freak out with very large widgets
183195 "input-type" => qr/^(button|checkbox|file|hidden|image|password|radio|readonly|reset|submit|text)$/i,
203215 # "style" => qr/expression|eval|script:|mocha:|\&{|\@import|(?<!background-)position:|background-image/i, # XXX there are probably a million more ways to cause trouble with css!
204216 "style" => qr/^.*$/s,
205217 #kc In addition to this, we could strip all 'javascript:|expression|' etc. from all attributes(in attribute_cleanup())
206 "stylesheet" => [ qr/expression|eval|script:|mocha:|\&{|\@import/i ], # stylesheets are forbidden if Embedded => 1. css positioning can be allowed in an iframe.
218 "stylesheet" => [ qr/expression|eval|script:|mocha:|\&\{|\@import/i ], # stylesheets are forbidden if Embedded => 1. css positioning can be allowed in an iframe.
207219 # NB see also `process_stylesheet' below
208220 "style-type" => [ qr/script|mocha/i ],
209 "size" => qr/^[\d.]+(px|%)?$/i,
221 "size" => qr/^[\+\-]?[\d.]+(px|%)?$/i,
210222 "target" => qr/^[A-Za-z0-9_][A-Za-z0-9_.-]*$/,
211223 "base-href" => qr/^https?:\/\/[\w.\/]+$/,
212224 "anything" => qr/^.*$/, #[ 0, 0 ],
243255 "scroll" => "boolean",
244256 "scrolling" => "boolean",
245257 "topmargin" => "size",
258 "type" => "mime-type",
246259 "valign" => "align",
247260 "width" => "size",
261 "/" => "empty",
248262 );
249263
250264 my %ListAttributes =
292306 );
293307
294308 my %Tags = (
295 script => \&defang_script,
296 style => \&defang_style,
309 script => \&defang_script_tag,
310 style => \&defang_style_tag,
297311 "html" => 100,
298312 #
299313 # Safe elements commonly found in the <head> block follow.
360374 "shape" => "shape",
361375 "target" => "target",
362376 },
377 "article" => 1,
363378 "applet" => 0,
364379 "basefont" =>
365380 {
416431 "size" => "number",
417432 "ptsize" => "number",
418433 },
434 "footer" => 1,
419435 "form" => # FORM
420436 {
421437 "method" => "form-method",
424440 "accept" => "anything",
425441 "accept-charset" => "anything",
426442 },
443 "header" => 1,
427444 "hr" =>
428445 {
429446 "size" => "number",
503520 "nobr" => 0,
504521 "noembed" => 1,
505522 "nolayer" => 1,
506 "noscript" => 1,
523 # Pretend our defang result is going into a non-scripting environment,
524 # even though javascript is likely enabled, so just defang all noscript tags
525 "noscript" => 0,
507526 "noembed" => 1,
508527 "object" => 0,
509528 "ol" => \%ListAttributes,
526545 "pre" => 1,
527546 "rt" => 0,
528547 "ruby" => 0,
548 "section" => 1,
529549 "select" => # FORM
530550 {
531551 "disabled" => "anything",
631651 );
632652
633653 # Some entity conversions for attributes
654 my $CtrlChars = qr/[\x00-\x08\x0b-\x1f]/;
634655 my %EntityToChar = (quot => '"', apos => "'", amp => '&', 'lt' => '<', 'gt' => '>');
635 my %CharToEntity = reverse %EntityToChar;
636 my %QuoteRe = ('"' => qr/(["&<>])/, "'" => qr/(['&<>])/, "" => qr/(["&<>])/);
637
638 # Default list of mismatched tags to track
639 my %MismatchedTags = map { $_ => 1 } qw(table tbody thead tr td th font div span pre center blockquote dl ul ol h1 h2 h3 h4 h5 h6 fieldset tt p noscript a);
656 my %CharToEntity = ((reverse %EntityToChar), ' ' => '#x20', '/' => '#x2f', "\x09" => '#x09', "\x0a" => '#x0a');
657 my %QuoteRe = ('"' => qr/(["&<>\x09\x0a])/, "'" => qr/(['&<>\x09\x0a])/, "" => qr/(['"&<> \/\x09\x0a])/);
640658
641659 # When fixing mismatched tags, sometimes a close tag
642660 # shouldn't close all the way out
673691 # Convert to hash of hashes
674692 $_ = { default => $_->[0], map { $_ => 1 } @$_ } for values %ImplicitOpenTags;
675693
676 my %BlockTags = map { $_ => 1 } qw(h1 h2 h3 h4 h5 h6 p div pre plaintext address blockquote center form table tbody thead tfoot tr td caption colgroup col);
677 my %InlineTags = map { $_ => 1 } qw(span abbr acronym q sub sup cite code em kbd samp strong var dfn strike b i u s tt small big nobr a);
694 my %TableTags = map { $_ => 1 } qw(table tbody thead tfoot tr td th caption colgroup col);
695 my %BlockTags = map { $_ => 1 } qw(h1 h2 h3 h4 h5 h6 p div pre plaintext address blockquote center form table tbody thead tfoot tr td th caption colgroup col dl ul ol li fieldset);
696 my %InlineTags = map { $_ => 1 } qw(span abbr acronym q sub sup cite code em kbd samp strong var dfn strike b i u s tt small big nobr a font);
678697 my %NestInlineTags = map { $_ => 1 } qw(span abbr acronym q sub sup cite code em kbd samp strong var dfn strike b i u s tt small big nobr);
679698
699 # Default list of mismatched tags to track
700 my %MismatchedTags = (%BlockTags, %InlineTags);
701
680702 =head1 CONSTRUCTOR
681703
682704 =over 4
716738 =item B<css_callback>
717739
718740 Subroutine reference to be invoked when CSS data is found either as the contents of a 'style' attribute in an HTML tag, or as the contents of a <style> HTML tag.
741
742 =item B<content_callback>
743
744 Subroutine reference to be invoked when standard content between HTML tags in found.
719745
720746 =item B<fix_mismatched_tags>
721747
743769 return value of a callback. Any tag or attribute modifications made
744770 directly by a callback are still performed.
745771
772 =item B<delete_defang_content>
773
774 Normally defanged tags are turned into comments and prefixed by defang_,
775 and defanged styles are surrounded by /* ... */. If this is set to
776 true, then defanged content is deleted instead
777
746778 =item B<Debug>
747779
748780 If set, prints debugging output.
749
750 =back
751781
752782 =back
753783
761791
762792 my %Opts = @_;
763793
764 # my $Context = shift;
765
766 my ($tags_to_callback, $attribs_to_callback) = ($Opts{"tags_to_callback"}, $Opts{"attribs_to_callback"});
767 my %tags_to_callback = map { $_ => 1 } @$tags_to_callback if $tags_to_callback;
768 my %attribs_to_callback = map { $_ => 1 } @$attribs_to_callback if $attribs_to_callback;
794 my ($tags_to_callback, $attribs_to_callback, $empty_tags_to_collapse, $mismatched_tags_to_fix)
795 = @Opts{qw(tags_to_callback attribs_to_callback empty_tags_to_collapse mismatched_tags_to_fix)};
796 my %tags_to_callback;
797 %tags_to_callback = map { $_ => 1 } @$tags_to_callback if $tags_to_callback;
798 my %attribs_to_callback;
799 %attribs_to_callback = map { $_ => 1 } @$attribs_to_callback if $attribs_to_callback;
800 my %empty_tags_to_collapse;
801 %empty_tags_to_collapse = map { $_ => 1 } @$empty_tags_to_collapse if $empty_tags_to_collapse;
769802 my %mismatched_tags_to_fix = %MismatchedTags;
770 %mismatched_tags_to_fix = map { $_ => 1 } @{$Opts{'mismatched_tags_to_fix'}} if $Opts{'mismatched_tags_to_fix'};
803 %mismatched_tags_to_fix = map { $_ => 1 } @$mismatched_tags_to_fix if $mismatched_tags_to_fix;
771804
772805 my $Self = {
773806 defang_string => 'defang_',
807 defang_re => qr/^defang_/,
808 defang_default => (defined $Opts{defang_default} ? $Opts{defang_default} : DEFANG_DEFAULT),
774809 allow_double_defang => $Opts{allow_double_defang},
775810 tags_to_callback => \%tags_to_callback,
776811 tags_callback => $Opts{tags_callback},
778813 attribs_callback => $Opts{attribs_callback},
779814 url_callback => $Opts{url_callback},
780815 css_callback => $Opts{css_callback},
816 content_callback => $Opts{content_callback},
781817 mismatched_tags_to_fix => \%mismatched_tags_to_fix,
782818 fix_mismatched_tags => $Opts{fix_mismatched_tags},
783819 context => $Opts{context},
784820 opened_tags => [],
785821 opened_tags_count => {},
786 opened_nested_tags => [],
822 closed_into_block_tags => [],
823 empty_tags_to_collapse => \%empty_tags_to_collapse,
824 quiet => $Opts{quiet},
825 delete_defang_content => $Opts{delete_defang_content},
787826 Debug => $Opts{Debug},
788827 };
789828
790829 bless ($Self, $Class);
791830 return $Self;
792831 }
832
833 =item I<HTML::Defang-E<gt>new_bodyonly(%Options)>
834
835 Constructs a new HTML::Defang object that has the following
836 implicit options
837
838 =over 4
839
840 =item B<fix_mismatched_tags = 1>
841
842 =item B<delete_defang_content = 1>
843
844 =item B<tags_to_callback = [ qw(html head link body meta title bgsound) ]>
845
846 =item B<tags_callback = { ... remove all above tags and related content ... }>
847
848 =item B<url_callback = { ... explicity DEFANG_NONE to leave everything alone ... }>
849
850 =back
851
852 Basically this is a easy way to remove all html boiler plate
853 content and return only the html body content.
854
855 =cut
856
857 sub new_bodyonly {
858 return shift->new(
859 fix_mismatched_tags => 1,
860 delete_defang_content => 1,
861 tags_to_callback => [ qw(html head link body meta title bgsound) ],
862 tags_callback => sub {
863 my (undef, $Defang, $Angle, $lcTag, $IsEndTag, $AttributeHash, $AttributesEnd, $HtmlR, $OutR) = @_;
864 $$HtmlR =~ m{\G.*?(?=</title|</head)}gcis if $lcTag eq 'title' && !$IsEndTag;
865 return DEFANG_ALWAYS;
866 },
867 url_callback => sub { return DEFANG_NONE; },
868 @_
869 );
870 }
871
872 =back
873
874 =cut
793875
794876 =head1 CALLBACK METHODS
795877
10471129
10481130 =over 4
10491131
1050 =item I<defang($InputHtml)>
1132 =cut
1133
1134 =item I<defang($InputHtml, \%Opts)>
10511135
10521136 Cleans up $InputHtml of any executable code including scripting, embedded objects, applets, etc., and defang any XSS attacks.
10531137
1054 =over 4
1138 =over 4
10551139
10561140 =item B<Method parameters>
10571141
10681152 Returns the cleaned HTML. If fix_mismatched_tags is set, any tags that appear in @$mismatched_tags_to_fix that are unbalanced are automatically commented or closed.
10691153
10701154 =cut
1071
10721155 sub defang {
10731156 my $Self = shift;
10741157
10751158 my $I = shift;
1159 my $Opts = shift;
10761160
10771161 my $Debug = $Self->{Debug};
10781162
1079 my $HeaderCharset = shift;
1163 my $HeaderCharset = $Opts->{header_charset};
10801164 warn("defang HeaderCharset=$HeaderCharset") if $Debug;
1081 my $FallbackCharset = shift;
1165 my $FallbackCharset = $Opts->{fallback_charset};
10821166 warn("defang FallbackCharset=$FallbackCharset") if $Debug;
10831167
10841168 $Self->{Reentrant}++;
1169
1170 # Output buffer
1171 local $Self->{OutR} = $Opts->{add_to_existing} ? $Self->{OutR} : \(my $O = "");
1172 my $OutR = $Self->{OutR};
10851173
10861174 # Get encoded characters
10871175 # $Self->{Charset} = $Self->get_applicable_charset($_, $HeaderCharset, $FallbackCharset);
11101198 # Force byte matching everywhere (see above)
11111199 use bytes;
11121200
1201 Carp::cluck() if !defined $I;
1202
11131203 # Strip all NUL chars
11141204 $I =~ s/\0//g;
1115
1116 # Output buffer
1117 my $O = '';
11181205
11191206 # This parser uses standard /\G.../gc matching, so have to be careful
11201207 # to not reset pos() on the string
11291216 # walk to next < (testing in 5.8.8 shows .*? is faster than [^<]* or [^<]*?)
11301217 if ($I =~ m{\G(.*?)<}gcso) {
11311218
1219 my $Content = $1;
1220
1221 # Call content callback if present
1222 $Self->{content_callback}->($Self->{context}, $Self, \$Content)
1223 if $Self->{content_callback};
1224
11321225 # Everything before tag goes into the output
1133 $O .= $1;
1226 $$OutR .= $Content;
11341227
11351228 # All tags default to open/close with </>
11361229 my ($OpenAngle, $CloseAngle) = ('<', '>');
11771270 NoParseAttributes:
11781271 my $Defang = DEFANG_ALWAYS;
11791272
1180 my $TagOps = $Tags{lc $Tag};
1273 my $lcTag = lc $Tag;
1274 my $TagOps = $Tags{$lcTag};
11811275
11821276 # Process this tag
1183 if (ref $TagOps eq "CODE") {
1277 if (!exists $Self->{tags_to_callback}->{$lcTag} && ref $TagOps eq "CODE") {
11841278
11851279 warn "process_tag Found CODE reference" if $Debug;
1186 $Defang = $Self->${TagOps}(\$O, \$I, $TagOps, \$OpenAngle, $IsEndTag, $Tag, $TagTrail, \@Attributes, \$CloseAngle);
1280 $Defang = $Self->${TagOps}($OutR, \$I, $TagOps, \$OpenAngle, $IsEndTag, $lcTag, $TagTrail, \@Attributes, \$CloseAngle);
11871281
11881282 } else {
11891283
11901284 warn "process_tag Found regular tag" if $Debug;
1191 $Defang = $Self->defang_attributes(\$O, \$I, $TagOps, \$OpenAngle, $IsEndTag, $Tag, $TagTrail, \@Attributes, \$CloseAngle);
1285 $Defang = $Self->defang_attributes($OutR, \$I, $TagOps, \$OpenAngle, $IsEndTag, $lcTag, $TagTrail, \@Attributes, \$CloseAngle);
11921286
11931287 }
11941288 die "Callback reset pos on Tag=$Tag IsEndTag=$IsEndTag" if !defined pos($I);
11991293 # @Attributes can have unicode values, but we're within "use bytes", so it's flattened ok
12001294 my $TagContent = $TagTrail . join("", grep { defined } map { @$_ } @Attributes);
12011295
1202 $Defang ||= $Self->track_tags(\$O, \$I, $TagOps, \$OpenAngle, $IsEndTag, $Tag, \$TagContent)
1203 if $Self->{fix_mismatched_tags} && ($Defang != DEFANG_ALWAYS);
1296 if ($Self->{fix_mismatched_tags} && ($Defang == DEFANG_NONE)) {
1297 if (!$IsEndTag) {
1298 $Defang = $Self->open_tag(0, $OutR, \$I, $lcTag, \$TagContent);
1299 } else {
1300 $Defang = $Self->close_tag(0, $OutR, \$I, $lcTag);
1301 goto SkipOutput if $Defang == DEFANG_ALWAYS;
1302 }
1303 }
12041304
12051305 # defang unknown tags
12061306 if ($Defang != DEFANG_NONE) {
12071307 warn "defang Defanging $Tag" if $Debug;
1208 $Tag = $Self->{defang_string} . $Tag
1209 if $Self->{allow_double_defang}
1210 || (
1211 substr( $Tag, 0, length( $Self->{defang_string} ) ) ne
1212 $Self->{defang_string} );
1213 $TagContent =~ s/--//g;
1214 $Tag =~ s/--//g;
1215 $OpenAngle =~ s/^</<!--/;
1216 $CloseAngle =~ s/>$/-->/;
1308 if ($Self->{delete_defang_content}) {
1309 $OpenAngle = $IsEndTag = $Tag = $TagContent = $CloseAngle = '';
1310 } else {
1311 $Tag = $Self->{defang_string} . $Tag
1312 if $Self->{allow_double_defang} || $Tag !~ $Self->{defang_re};
1313 $TagContent =~ s/--//g;
1314 $Tag =~ s/--//g;
1315 $OpenAngle =~ s/^</<!--/;
1316 $CloseAngle =~ s/>$/-->/;
1317 }
12171318 }
12181319
12191320 # And put it all back together into the output string
1220 $O .= $OpenAngle . $IsEndTag . $Tag . $TagContent . $CloseAngle;
1221
1222 # It's a comment of some sort. We are looking for regular HTML comment, XML CDATA section and
1223 # IE conditional comments
1224 # Refer http://msdn.microsoft.com/en-us/library/ms537512.aspx for IE conditional comment information
1225 } elsif ($I =~ m{\G(!)((?:\[CDATA\[|(?:--)?\[if|--)?)}gcis) {
1321 $$OutR .= $OpenAngle . $IsEndTag . $Tag . $TagContent . $CloseAngle;
1322 SkipOutput:
1323
1324 # It's a comment of some sort. We are looking for regular HTML comment, XML CDATA section
1325 } elsif ($I =~ m{\G(!)((?:\[CDATA\[|--)?)}gcis) {
12261326
12271327 my ($Comment, $CommentDelim) = ($1, $2);
12281328 warn "defang Comment=$Comment CommentDelim=$CommentDelim" if $Debug;
1229
1329
12301330 # Find the appropriate closing delimiter
12311331 my $IsCDATA = $CommentDelim eq "[CDATA[";
12321332 my $ClosingCommentDelim = $IsCDATA ? "]]" : $CommentDelim;
1233
1234 my $EndRestartCommentsText = '';
1235 # Handle IE conditionals specially. We can have <![if ...]>, <!--[if ...]> and <!--[if ...]-->
1236 # for the third case, we just want to immediately match the -->
1237 if ($CommentDelim =~ /((?:--)?)\[if/) {
1238 my $ConditionalDelim = $1;
1239 $EndRestartCommentsText = '--' if $ConditionalDelim eq '';
1240 $ClosingCommentDelim = $CommentDelim;
1241 if ($I !~ m{\G[^\]]*\]-->}gcis) {
1242 $ClosingCommentDelim = "<![endif]$ConditionalDelim";
1243 }
1333
1334 warn "defang ClosingCommentDelim=$ClosingCommentDelim" if $Debug;
1335
1336 my ($CommentStartText, $CommentEndText) = ("--/*SC*/", "/*EC*/--");
1337
1338 # Convert to regular HTML comment
1339 if (!$Self->{delete_defang_content}) {
1340 $$OutR .= $OpenAngle . $Comment . $CommentStartText;
12441341 }
12451342
1246 warn "defang ClosingCommentDelim=$ClosingCommentDelim" if $Debug;
1247
1248 my ($CommentStartText, $CommentEndText) = ("--/*SC*/", "/*EC*/--");
1249
1250 # Convert to regular HTML comment
1251 $O .= $OpenAngle . $Comment . $CommentStartText;
1252
12531343 # Find closing comment
1254 if ($I =~ m{\G(.*?)(\Q${ClosingCommentDelim}\E!?\s*)(>)}gcis || $I =~ m{\G(.*?)(--)(>)}gcis) {
1344 if ($I =~ m{\G(.*?)(\Q$ClosingCommentDelim\E!?\s*)(>)}gcis || $I =~ m{\G(.*?)(--)(>)}gcis) {
12551345
12561346 my ( $StartTag, $CommentData, $ClosingTag, $CloseAngle ) =
12571347 ( $CommentDelim, $1, $2, $3 );
1258
1259 if ($EndRestartCommentsText && $CommentData =~ s/^(.*?)(>.*)$/$2/s) {
1260 $StartTag .= $1;
1261 }
12621348
12631349 # Strip all HTML comment markers
12641350 $StartTag =~ s/--//g;
12651351 $CommentData =~ s/--//g;
12661352 $ClosingTag =~ s/--//g;
12671353
1268 $StartTag .= $EndRestartCommentsText if $CommentData;
1269 $ClosingTag =~ s{^(<!)}{$1$EndRestartCommentsText} if $CommentData;
1270
1271 # Put it all into the output
1272 $O .= $StartTag
1273 . ($EndRestartCommentsText ? $Self->defang($CommentData) : $CommentData)
1274 . $ClosingTag
1275 . $CommentEndText
1276 . $CloseAngle;
1354 if (!$Self->{delete_defang_content}) {
1355 # Put it all into the output
1356 $$OutR .= $StartTag
1357 . $CommentData
1358 . $ClosingTag
1359 . $CommentEndText
1360 . $CloseAngle;
1361 }
12771362
12781363 # No closing comment, so we add that
12791364 } else {
12841369 $Data =~ s/--//g;
12851370
12861371 # Output
1287 $O .= $Data . $CommentEndText . ">";
1372 if (!$Self->{delete_defang_content}) {
1373 $$OutR .= $Data . $CommentEndText . ">";
1374 }
12881375
12891376 }
12901377
13031390
13041391 $Data =~ s{--}{}g;
13051392
1306 $O .= $OpenAngle . '!--' . $Processing . $Data . '-->';
1393 if (!$Self->{delete_defang_content}) {
1394 $$OutR .= $OpenAngle . '!--' . $Processing . $Data . '-->';
1395 }
13071396
13081397 }
13091398 # Some other thing starting with <, keep looking
13101399
1400 if (exists $Self->{TrackedAppendOutput}) {
1401 for (@{delete $Self->{TrackedAppendOutput}}) {
1402 $Self->open_tag(1, $OutR, \$I, $_->[0], \$_->[1], 1);
1403 }
1404 }
13111405 if (exists $Self->{AppendOutput}) {
1312 $O .= delete $Self->{AppendOutput};
1406 $$OutR .= delete $Self->{AppendOutput};
13131407 }
13141408 if (exists $Self->{DelayedAppendOutput}) {
1315 $O .= $Self->defang(delete $Self->{DelayedAppendOutput});
1409 $Self->defang(delete $Self->{DelayedAppendOutput}, { add_to_existing => 1 });
13161410 }
13171411 next;
13181412 }
13211415 warn "defang OutputRemainder" if $Debug;
13221416 $I =~ m/\G(.*)$/gcs;
13231417
1324 $O .= $1 if $1;
1418 $$OutR .= $1 if $1;
13251419
13261420 # Exit if we got here
13271421 last;
13291423
13301424 # If not a recursive call, close mismatched tags
13311425 if ($Self->{Reentrant}-- <= 1) {
1332 $Self->close_tags(\$O);
1426 $Self->close_all_tags($OutR, \$I);
13331427 }
13341428
13351429 # Turn on utf-8 flag again
1336 Encode::_utf8_on($O) if $UTF8Input;
1337
1338 return $O;
1430 Encode::_utf8_on($$OutR) if $UTF8Input;
1431
1432 return $$OutR;
13391433 }
13401434
13411435 =item I<add_to_output($String)>
13591453 =back
13601454
13611455 =cut
1362
1363 # Callbacks call this method
13641456 sub add_to_output {
1457 # Callbacks call this method
13651458 my $Self = shift;
13661459 $Self->{AppendOutput} = '' if !defined $Self->{AppendOutput};
13671460 $Self->{AppendOutput} .= shift;
13731466 $Self->{DelayedAppendOutput} .= shift;
13741467 }
13751468
1469 sub track_and_add_tag_to_output {
1470 my $Self = shift;
1471 push @{$Self->{TrackedAppendOutput}}, shift;
1472 }
1473
13761474 =item B<INTERNAL METHODS>
13771475
13781476 Generally these methods never need to be called by users of the class, because they'll be called internally as the appropriate tags are
13801478
13811479 =over 4
13821480
1383 =item I<defang_script($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle)>
1481 =cut
1482
1483 =item I<defang_script_tag($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle)>
13841484
13851485 This method is invoked when a <script> tag is parsed. Defangs the <script> opening tag, and any closing tag. Any scripting content is also commented out, so browsers don't display them.
13861486
14351535 =back
14361536
14371537 =cut
1438
1439 sub defang_script {
1538 sub defang_script_tag {
14401539 my $Self = shift;
1441 my ($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle) = @_;
1540 my ($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle) = @_;
14421541 warn "defang_script Processing <script> tag" if $Self->{Debug};
14431542
14441543 if (!$IsEndTag) {
14451544
1446 # If we just parsed a starting <script> tag, code better be commented. If
1447 # not, we attach comments around the code.
1545 # If we just parsed a starting <script> tag, find up to end tag
1546 # There's all sort of possible mess around this:
1547 # </script<foo> - not really an end tag
1548 # </script foo="bar > yes, still in a attribute"> - a valid end tag
1549 # For weird cases, we end script tag early and end up defanging script
1550 # content as HTML content, which is still safe
14481551 if ($$HtmlR =~ m{\G(.*?)(?=</script\b)}gcsi) {
14491552 my $ScriptTagContents = $1;
14501553 warn "defang_script ScriptTagContents $ScriptTagContents" if $Self->{Debug};
1451 $ScriptTagContents =~ s/^(\s*)(<!--)?(.*?)(-->)?(\s*)$/$1<!-- $3 -->$5/s;
1452 $Self->add_to_output($ScriptTagContents);
1453
1554 if (!$Self->{delete_defang_content}) {
1555 $ScriptTagContents =~ s/<!--|-->|--//g;
1556 $ScriptTagContents = "<!-- " . $ScriptTagContents . " -->";
1557 $Self->add_to_output($ScriptTagContents);
1558 }
14541559 }
14551560 }
14561561
14581563 return DEFANG_ALWAYS;
14591564 }
14601565
1461 =item I<defang_style($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle, $IsAttr)>
1462
1463 Builds a list of selectors and declarations from HTML style tags as well as style attributes in HTML tags and calls defang_stylerule() to do the actual defanging.
1464
1465 Returns 0 to indicate that style tags must not be defanged.
1466
1467 =over 4
1468
1469 =item B<Method parameters>
1470
1471 =over 4
1472
1473 =item I<$IsAttr>
1474
1475 Whether we are currently parsing a style attribute or style tag. $IsAttr will be true if we are currently parsing a style attribute.
1476
1477 =back
1478
1479 For a description of other parameters, see documentation of defang_script() method
1480
1481 =back
1482
1483 =cut
1484
1485 sub defang_style {
1486
1487 my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle, $IsAttr) = @_;
1488 my $lcTag = lc $Tag;
1489
1490 warn "defang_style Tag=$Tag IsEndTag=$IsEndTag IsAttr=$IsAttr" if $Self->{Debug};
1566 sub defang_style_tag {
1567 my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle) = @_;
1568
1569 warn "defang_style_tag Tag=$lcTag IsEndTag=$IsEndTag" if $Self->{Debug};
1570
1571 # Defang attributes
1572 my $Defang = $Self->defang_attributes($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle);
14911573
14921574 # Nothing to do if end tag
1493 return DEFANG_NONE if !$IsAttr && $IsEndTag;
1575 return $Defang if $IsEndTag;
14941576
14951577 # Do all style work in byte mode
14961578 use bytes;
14991581 my $ClosingStyleTagPresent = 1;
15001582
15011583 for ($$HtmlR) {
1502
1503 if (!$IsAttr) {
1504 if (m{\G(.*?)(?=</style\b)}gcis) {
1505 $Content = $1;
1506
1507 # No ending style tag
1508 } elsif (m{\G([^<]*)}gcis) {
1509 $Content = $1;
1510 $ClosingStyleTagPresent = 0;
1511 }
1512 # Its a style attribute
1513 } else {
1514 # Avoid undef warning for style attr with no value. eg <tag style>
1515 $Content = defined($_) ? $_ : '';
1516 }
1517 }
1518
1584 if (m{\G(.*?)(?=</style\b)}gcis) {
1585 $Content = $1;
1586
1587 # No ending style tag
1588 } elsif (m{\G([^<]*)}gcis) {
1589 $Content = $1;
1590 $ClosingStyleTagPresent = 0;
1591 }
1592 }
1593
15191594 # Handle any wrapping HTML comments. If no comments, we add
15201595 my ($OpeningHtmlComment, $ClosingHtmlComment) = ('', '');
1521 if (!$IsAttr) {
1522 $OpeningHtmlComment = $Content =~ s{^(\s*<!--)}{} ? $1 : "<!--";
1523 $ClosingHtmlComment = $Content =~ s{(-->\s*)$}{} ? $1 : "-->";
1524 }
1596 $OpeningHtmlComment = $Content =~ s{^(\s*<!--)}{} ? $1 . " " : "<!-- ";
1597 $ClosingHtmlComment = $Content =~ s{(-->\s*)$}{} ? " " . $1 : " -->";
1598
1599 # Check for large bogus style data with mostly HTML tags and blat it
1600 if (length $Content > 16384) {
1601 my $TagCount = 0;
1602 $TagCount++ while $Content =~ m{</?\w+\b[^>]*>}g;
1603 if ($TagCount > length($Content)/256) {
1604 $Content = '';
1605 }
1606 }
1607
1608 my $StyleOut = $Self->defang_style_text($Content, $lcTag, 0, undef, $HtmlR, $OutR);
1609
1610 $Self->add_to_output($OpeningHtmlComment . $StyleOut . $ClosingHtmlComment);
1611 $Self->add_to_output("</style>") if !$ClosingStyleTagPresent;
1612
1613 return $Defang;
1614 }
1615
1616 =item I<defang_style_text($Content, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR)>
1617
1618 Defang some raw css data and return the defanged content
1619
1620 =over 4
1621
1622 =item B<Method parameters>
1623
1624 =over 4
1625
1626 =item I<$Content>
1627
1628 The input style string that is defanged.
1629
1630 =item I<$IsAttr>
1631
1632 True if $Content is from an attribute, otherwise from a <style> block
1633
1634 =back
1635
1636 =back
1637
1638 =cut
1639 sub defang_style_text {
1640 my ($Self, $Content, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR) = @_;
15251641
15261642 # Clean up all comments, expand character escapes and such
15271643 $Self->cleanup_style($Content, $IsAttr);
15321648 warn "defang_style Naked=$Naked" if $Self->{Debug};
15331649
15341650 # And suitably change the regex to match the data
1535 my $SelectorRuleRE = $Naked ? qr/(\s*)()()()($StyleRules)()(\s*)/o :
1536 qr/(\s*)((?:$StyleSelectors)?)(\s*)(\{)($StyleRules)(\})(\s*)/o;
1537
1538 my (@Selectors, @SelectorRules, %ExtraData );
1651 my $SelectorRuleRE = $Naked ? $RECStyleNaked : $RECStyleSelected;
1652
1653 my (@Selectors, @SelectorRules, @ExtraData, @InMedia);
15391654
15401655 # Now we parse the selectors and declarations
1541 while ($Content =~ m{\G.*?$SelectorRuleRE}sgc) {
1542 my ($Selector, $SelectorRule) = ($2, $5);
1543 last if $Selector eq '' && $SelectorRule eq '';
1544 push @Selectors, $Selector;
1545 push @SelectorRules, $SelectorRule;
1546 warn "defang_style Selector=$Selector" if $Self->{Debug};
1547 warn "defang_style SelectorRule=$SelectorRule" if $Self->{Debug};
1548 $ExtraData{$Selector} = [ $1, $3, $4, $6, $7];
1656 while (1) {
1657 if ($Content =~ m{$RECStyleMediaSelector}sgco) {
1658 push @InMedia, $2;
1659 push @Selectors, $2;
1660 push @SelectorRules, "";
1661 push @ExtraData, [ $1, "", $3, "", $4 ];
1662 } elsif (@InMedia && $Content =~ m{\G(\s*)(\})(\s*)}sgc) {
1663 pop @InMedia;
1664 push @Selectors, "";
1665 push @SelectorRules, "";
1666 push @ExtraData, [ $1, "", "", $2, $3 ];
1667 } elsif ($Content =~ m{$SelectorRuleRE}sgc) {
1668 my ($Selector, $SelectorRule) = ($2, $5);
1669 last if $Selector eq '' && $SelectorRule =~ /^[;\s]*$/;
1670 $Selector = join("\000", @InMedia, $Selector) if @InMedia;
1671 push @Selectors, $Selector;
1672 push @SelectorRules, $SelectorRule;
1673 push @ExtraData, [ $1, $3, $4, $6, $7];
1674 warn "defang_style Selector=$Selector" if $Self->{Debug};
1675 warn "defang_style SelectorRule=$SelectorRule" if $Self->{Debug};
1676
1677 # Just a large bunch of selectors and no rules, suck up and discard
1678 } elsif (!$Naked && $Content =~ m{\G\s*$StyleSelectors\s*$}sgc) {
1679 # Looks like an html tag, suck up and discard
1680 } elsif (!$Naked && $Content =~ m{\G(?:</?$TagNameRE(?:\s[^>\{\}]*)?>\s*)+}sgc) {
1681
1682 # If content didn't match a rule, suck up whitespace
1683 } elsif ($Content =~ m{\G\s+}sgc) {
1684 # Or any non-whitespace, but try and sync to <...> tags
1685 } elsif ($Content =~ m{\G[^\s<]+}sgc || $Content =~ m{\G<+}sgc) {
1686
1687 # Nothing matched, must be at end
1688 } else { last; }
15491689 }
15501690
15511691 # Check declaration elements for defanging
1552 $Self->defang_stylerule(\@Selectors, \@SelectorRules, $lcTag, $IsAttr, $HtmlR, $OutR);
1692 $Self->defang_stylerule(\@Selectors, \@SelectorRules, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR);
15531693
15541694 my $StyleOut = "";
15551695
15571697 foreach my $Selector (@Selectors) {
15581698
15591699 my $SelectorRule = shift @SelectorRules;
1560 my $Spaces = $ExtraData{$Selector};
1700 my $Spaces = shift @ExtraData;
15611701 my ($BeforeSelector, $AfterSelector, $OpenBrace, $CloseBrace, $AfterRule) = @$Spaces if $Spaces;
1562 ($BeforeSelector, $AfterSelector, $AfterRule) = ("", " ", "\n") unless $ExtraData{$Selector};
1702 ($BeforeSelector, $AfterSelector, $AfterRule) = ("", " ", "\n") unless $Spaces;
15631703 ($OpenBrace, $CloseBrace) = ("{", "}") if !$Spaces && !$IsAttr;
15641704
15651705 # Put back the rule together
15661706 if (defined($Selector)) {
15671707 $StyleOut .= $BeforeSelector if defined($BeforeSelector);
1568 $StyleOut .= $Selector;
1708 $StyleOut .= $Selector =~ /\000/ ? (reverse split /\000/, $Selector)[0] : $Selector;
15691709 $StyleOut .= $AfterSelector if defined($AfterSelector);
15701710 $StyleOut .= $OpenBrace if defined($OpenBrace);
15711711 $StyleOut .= $SelectorRule if defined($SelectorRule);
15771717
15781718 warn "defang_style StyleOut=$StyleOut" if $Self->{Debug};
15791719
1580 if ($IsAttr) {
1581 $$HtmlR = $StyleOut;
1582
1583 } else {
1584 $Self->add_to_output($OpeningHtmlComment . $StyleOut . $ClosingHtmlComment);
1585 $Self->add_to_output("</style>") if !$ClosingStyleTagPresent;
1586 }
1587
1588 # We don't want <style> tags to be defanged
1589 return DEFANG_NONE;
1720 return $StyleOut;
15901721 }
15911722
15921723 =item I<cleanup_style($StyleString)>
16081739 =back
16091740
16101741 =cut
1611
16121742 sub cleanup_style {
16131743 my $Self = shift;
16141744
16491779
16501780 }
16511781
1652 =item I<defang_stylerule($SelectorsIn, $StyleRules, $lcTag, $IsAttr, $HtmlR, $OutR)>
1782 =item I<defang_stylerule($SelectorsIn, $StyleRules, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR)>
16531783
16541784 Defangs style data.
16551785
16881818 =back
16891819
16901820 =cut
1691
16921821 sub defang_stylerule {
1693
1694 my ($Self, $SelectorsIn, $StyleRules, $lcTag, $IsAttr, $HtmlR, $OutR) = @_;
1822 my ($Self, $SelectorsIn, $StyleRules, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR) = @_;
16951823
16961824 my (@SelectorStyleKeyValues, %SelectorStyleKeyExtraData);
16971825
17101838 warn "defang_stylerule Key=$Key Value=$Value Separator=$Separator ValueEnd=$ValueEnd" if $Self->{Debug};
17111839 # Store everything except style property and value in a hash
17121840 $StyleKeyExtraData{lc $Key} = [$KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail];
1713 my $DefangStyleRule = DEFANG_DEFAULT;
1841 my $DefangStyleRule = $Self->{defang_default};
17141842
17151843 # If the style value has a URL in it and URL callback has been supplied, make a url_callback
1716 if ($Self->{url_callback} && $Value =~ m/\s*url\(\s*((?:['"])?)(.*?)\1\s*\)/i) {
1717 my ($UrlOrig, $Url) = ($2, $2) if $2;
1718 warn "defang_stylerule Url found in style property value. Url=$Url" if $Self->{Debug};
1719 my $lcAttrKey = $IsAttr ? "style" : undef;
1720 $DefangStyleRule = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, \$Url, undef, $HtmlR, $OutR) if $Url;
1721 # Save back any changes
1722 warn "defang_stylerule After URL callback, Value=$Value DefangStyleRule=$DefangStyleRule" if $Self->{Debug};
1723 $Value =~ s{\Q$UrlOrig\E}{$Url} if $UrlOrig;
1844 if ($Self->{url_callback}) {
1845 our $StartPos = 0;
1846 while ($Value =~ m/\G\s*url\(\s*((?:['"])?)(?{ $StartPos = pos; })(.*?)\1\s*\)/gci) {
1847 my ($UrlOrig, $Url) = $2 ? ($2, $2) : ('', '');
1848 my $EndPos = pos($Value);
1849 warn "defang_stylerule Url found in style property value. Url=$Url" if $Self->{Debug};
1850 my $lcAttrKey = $IsAttr ? "style" : undef;
1851 $DefangStyleRule = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, \$Url, $AttributeHash, $HtmlR, $OutR, lc $Key) if $Url;
1852 # Save back any changes
1853 warn "defang_stylerule After URL callback, Value=$Value DefangStyleRule=$DefangStyleRule" if $Self->{Debug};
1854 if ($UrlOrig) {
1855 substr($Value, $StartPos, length($UrlOrig), $Url);
1856 pos($Value) = $EndPos - length($UrlOrig) + length($Url);
1857 }
1858 }
17241859 }
17251860
17261861 # Save the style property, value and defang flag
17551890 for (my $k = 0; $k < @$KeyValueRules; $k++) {
17561891 my ($Key, $Value, $Defang) = @{$KeyValueRules->[$k]};
17571892
1758 my $v = $ExtraData->{lc $Key};
1759 my ($KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail) = @{$v || []};
1760
1893 my $v = ($ExtraData->{lc $Key} ||= []);
1894 my ($KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail) = @$v;
1895
1896 # Always need a separator
1897 $v->[1] = defined $v->[1] ? $v->[1] : ':';
17611898 # If an intermediate style property-value pair doesn't have a terminating semi-colon, add it
1762 if ($k > 0 && !$v) {
1763 my $PreviousRule = $KeyValueRules->[$k - 1];
1764 my $PreviousKey = $PreviousRule->[0];
1765 my $PrevExtra = $ExtraData->{lc $PreviousKey};
1766 $ExtraData->{lc $PreviousKey}->[4] .= ";" if defined($PrevExtra->[4]) && $PrevExtra->[4] !~ m/;/;
1767 $ExtraData->{lc $Key}->[1] = ":";
1899 if ($k < @$KeyValueRules - 1) {
1900 $v->[4] .= ";" if !defined $v->[4] || $v->[4] !~ m/;/;
17681901 }
17691902
17701903 }
17891922 my ($Key, $Value, $Defang) = @$KeyValueRule;
17901923 my $v = $ExtraData->{lc $Key};
17911924 my ($KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail) = @{$v || []};
1792 ($Separator, $ValueEnd, $ValueTrail) = (":", ";", " ") unless $v;
1793
1925
17941926 # Flag to defang if a url, expression or unallowed character found
17951927 if ($Defang == DEFANG_DEFAULT) {
17961928 $Defang = $Value =~ m{^\s*[a-z0-9%!"'`:()#\s.,\/+-]+\s*;?\s*$}i ? DEFANG_NONE : DEFANG_ALWAYS;
1797 $Defang = $Value =~ m{^\s*url\s*\(}i ? DEFANG_ALWAYS : $Defang;
1798 $Defang = $Value =~ m{^\s*expression\s*\(}i ? DEFANG_ALWAYS : $Defang;
1929 $Defang = $Value =~ m{\burl\s*\(}i ? DEFANG_ALWAYS : $Defang;
1930 $Defang = $Value =~ m{\bexpression\s*\(}i ? DEFANG_ALWAYS : $Defang;
17991931 }
18001932
18011933 ($KeyPilot, $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd, $ValueTrail) =
18031935 ($KeyPilot, $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd, $ValueTrail);
18041936
18051937 # Comment out the style property-value pair if $Defang
1806 $Key = $Defang != DEFANG_NONE ? "/*" . $Key : $Key;
1807 $ValueEnd = $Defang != DEFANG_NONE ? $ValueEnd . "*/" : $ValueEnd;
1938 my $CommentDefang = 0;
1939 if ($Defang != DEFANG_NONE) {
1940 if ($Self->{delete_defang_content}) {
1941 $KeyPilot = $Key = $Separator = $QuoteStart = $Value = $QuoteEnd = $ValueEnd = $ValueTrail = '';
1942 } else {
1943 $CommentDefang = 1;
1944 }
1945 }
18081946
18091947 # Put the rule together back
18101948 if (defined($Key)) {
1811 $Rule .= join "", $KeyPilot, $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd, $ValueTrail;
1949 my $RuleContent = join "", $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd;
1950 if ($CommentDefang) {
1951 # Strip any mismatched comment markers, then add our own
1952 $RuleContent =~ s{/\*}{}g;
1953 $RuleContent =~ s{\*/}{}g;
1954 $RuleContent = "/*" . $RuleContent . "*/";
1955 }
1956 $Rule .= join "", $KeyPilot, $RuleContent, $ValueTrail;
18121957 }
18131958
18141959 warn "defang_stylerule Rule=$Rule" if $Self->{Debug};
18321977
18331978 =item B<Method parameters>
18341979
1835 For a description of the method parameters, see documentation of defang_script() method
1980 For a description of the method parameters, see documentation of defang_script_tag() method
18361981
18371982 =back
18381983
18391984 =cut
1840
18411985 sub defang_attributes {
1842 my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle) = @_;
1843 my $lcTag = lc $Tag;
1986 my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle) = @_;
18441987
18451988 my $Debug = $Self->{Debug};
1989
1990 my $DefangTag = $Self->{defang_default};
1991 my ($DefangTagUrlOverride, $DefangTagAttrOverride);
18461992
18471993 # Create a key -> \value mapping of all attributes up front
18481994 # so we have a complete hash for each callback
1849 my %AttributeHash = map { lc($_->[0]) => \$_->[4] } @$Attributes;
1995 my %AttributeHash;
1996 for my $Attr (@$Attributes) {
1997 my ($AttrKey, $AttrValR) = ($Attr->[0], \$Attr->[4]);
1998
1999 my $lcAttrKey = $Attr->[7] = defined $AttrKey ? lc($AttrKey) : "";
2000
2001 # Get the attribute value cleaned up
2002 $$AttrValR = $Self->cleanup_attribute($$AttrValR);
2003 warn "defang_attributes cleaned AttrVal=$$AttrValR" if $Debug;
2004
2005 $AttributeHash{$lcAttrKey} = $AttrValR;
2006 }
2007
2008 # Callback if the tag is in @$tags_to_callback
2009 if (exists($Self->{tags_to_callback}->{$lcTag})) {
2010 warn "defang_attributes Calling tags_callback for $lcTag" if $Debug;
2011 $DefangTag = $Self->{tags_callback}->($Self->{context}, $Self, $OpenAngle, $lcTag, $IsEndTag, \%AttributeHash, $CloseAngle, $HtmlR, $OutR);
2012 }
18502013
18512014 # Now process each attribute
18522015 foreach my $Attr (@$Attributes) {
1853
1854 # We get the key and value of the attribute
1855 my ($AttrKey, $AttrValR) = ($Attr->[0], \$Attr->[4]);
1856 my $lcAttrKey = lc $AttrKey;
1857 warn "defang_attributes Tag=$Tag AttrKey=$AttrKey AttrVal=$$AttrValR" if $Debug;
1858
1859 # Get the attribute value cleaned up
1860 ($$AttrValR, my $AttrValStripped) = $Self->cleanup_attribute($Attr, $AttrKey, $$AttrValR);
1861 warn "defang_attributes AttrValStripped=$AttrValStripped" if $Debug;
2016 my ($lcAttrKey, $AttrKey, $AttrValR) = ($Attr->[7], $Attr->[0], \$Attr->[4]);
2017
2018 warn "defang_attributes Tag=$lcTag lcAttrKey=$lcAttrKey AttrVal=$$AttrValR" if $Debug;
18622019
18632020 my $AttribRule = "";
1864 if (ref($Tags{$lcTag})) {
2021 if (ref($Tags{$lcTag}) eq 'HASH') {
18652022 $AttribRule = $Tags{$lcTag}{$lcAttrKey};
18662023 }
18672024
1868 my $DefangAttrib = DEFANG_DEFAULT;
1869
1870 $AttribRule = $CommonAttributes{$lcAttrKey} unless $AttribRule;
2025 my $DefangAttrib = $Self->{defang_default};
2026
2027 $AttribRule ||= $CommonAttributes{$lcAttrKey};
18712028 warn "defang_attributes AttribRule=$AttribRule" if $Debug;
18722029
18732030 # If this is a URL type $AttrKey and URL callback method is supplied, make a url_callback
18742031 if ($Self->{url_callback} && $AttribRule && exists($UrlRules{$AttribRule})) {
18752032 warn "defang_attributes Making URL callback" if $Debug;
1876 $DefangAttrib = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, \%AttributeHash, $HtmlR, $OutR);
2033 ($DefangAttrib, $DefangTagUrlOverride) = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, \%AttributeHash, $HtmlR, $OutR, { defang_attrib => $DefangAttrib, attrib_rule => $AttribRule });
18772034 die "url_callback reset" if !defined pos($$HtmlR);
18782035 }
18792036
18802037 # We have a style attribute, so we call defang_style
18812038 if ($lcAttrKey eq "style") {
18822039 warn "defang_attributes Found style attribute, calling defang_style" if $Debug;
1883 $Self->defang_style($OutR, $AttrValR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle, 1);
2040 $$AttrValR = $Self->defang_style_text($$AttrValR, $lcTag, 1, \%AttributeHash, $HtmlR, $OutR);
18842041 }
18852042
18862043 # If a attribute callback is supplied and its interested in this attribute, we make a attribs_callback
18872044 if ($Self->{attribs_callback} && exists($Self->{attribs_to_callback}->{$lcAttrKey})) {
1888 warn "defang_attributes Making attribute callback for Tag=$Tag AttrKey=$AttrKey" if $Debug;
1889 my $DefangResult = $Self->{attribs_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, $HtmlR, $OutR);
2045 warn "defang_attributes Making attribute callback for Tag=$lcTag AttrKey=$AttrKey" if $Debug;
2046 (my $DefangResult, $DefangTagAttrOverride) = $Self->{attribs_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, $HtmlR, $OutR);
18902047 # Only use new result if not already DEFANG_ALWAYS from url_callback
18912048 $DefangAttrib = $DefangResult if $DefangAttrib != DEFANG_ALWAYS;
18922049 }
18932050
1894 if (($DefangAttrib == DEFANG_DEFAULT) && $AttribRule) {
1895 my $Rule = $Rules{$AttribRule};
1896 warn "defang_attributes AttribRule=$AttribRule Rule=$Rule" if $Debug;
1897
1898 # We whitelist the attribute if the value matches the rule
1899 if (ref($Rule) eq "Regexp") {
1900 $DefangAttrib = ($AttrValStripped =~ $Rule) ? DEFANG_NONE : DEFANG_ALWAYS;
1901 }
1902
1903 # Hack. Ref to array is a blacklist regexp
1904 if (ref($Rule) eq "ARRAY") {
1905 $DefangAttrib = ($AttrValStripped =~ $Rule->[0]) ? DEFANG_ALWAYS : DEFANG_NONE;
1906 }
1907
1908 } elsif (!$AttribRule) {
1909 $DefangAttrib = DEFANG_ALWAYS;
1910 }
1911
1912 warn "defang_attributes DefangAttrib=$DefangAttrib" if $Debug;
2051 # Check if the final attribute value needs defanging
2052 $DefangAttrib = $Self->defang_attribute_value($AttrValR, $AttribRule, $DefangAttrib);
19132053
19142054 # Store the attribute defang flag
1915 push @$Attr, $DefangAttrib if $DefangAttrib != DEFANG_NONE;
1916
1917 }
1918
1919 my $DefangTag = DEFANG_DEFAULT;
1920
1921 # Callback if the tag is in @$tags_to_callback
1922 if (exists($Self->{tags_to_callback}->{$lcTag})) {
1923 warn "defang_attributes Calling tags_callback for $Tag" if $Debug;
1924 $DefangTag = $Self->{tags_callback}->($Self->{context}, $Self, $OpenAngle, $lcTag, $IsEndTag, \%AttributeHash, $CloseAngle, $HtmlR, $OutR);
2055 $Attr->[8] = $DefangAttrib;
2056
19252057 }
19262058
19272059 my @OutputAttributes;
19282060
19292061 foreach my $Attr (@$Attributes) {
19302062
1931 my $lcAttr = lc $Attr->[0];
2063 my $lcAttr = $Attr->[7];
19322064
19332065 # If the attribute is deleted don't output it
19342066 unless ($AttributeHash{$lcAttr}) {
1935 warn "defang_attributes Marking attribute $Attr->[0] for deletion" if $Debug;
2067 warn "defang_attributes Marking attribute $lcAttr for deletion" if $Debug;
19362068 next;
19372069 }
19382070
19392071 # And we attach the defang string here, if the attribute should be defanged
19402072 # (attribute could be undef for buggy html, eg <ahref=blah>)
1941 $Attr->[0] = $Self->{defang_string}
1942 . ( $Attr->[0] || '' )
1943 if defined($Attr->[7]) && $Attr->[7] != DEFANG_NONE
1944 && (
1945 $Self->{allow_double_defang}
1946 || (
1947 substr( ( $Attr->[0] || '' ),
1948 0, length( $Self->{defang_string} ) ) ne $Self->{defang_string}
1949 )
1950 );
1951 # Set defang value to undef, or this value will appear in the output
1952 $Attr->[7] = undef;
2073 if ($Attr->[8] != DEFANG_NONE) {
2074 if ($Self->{delete_defang_content}) {
2075 @$Attr = ('') x 7;
2076 } else {
2077 $Attr->[0] = $Self->{defang_string} . ($Attr->[0] || '')
2078 if $Self->{allow_double_defang} || ($Attr->[0] || '') !~ $Self->{defang_re};
2079 }
2080 }
2081
2082 # Remove non-attribute values so they're not in the output
2083 splice(@$Attr, 7);
19532084
19542085 # Requote specials in attribute value
1955 my $QuoteRe = $QuoteRe{$Attr->[3]} || $QuoteRe{""};
1956 $Attr->[4] =~ s/$QuoteRe/'&'.$CharToEntity{$1}.';'/eg
1957 if defined($Attr->[4]);
2086 if (defined $Attr->[4]) {
2087 my $QuoteRe = $QuoteRe{$Attr->[3]} || $QuoteRe{""};
2088 $Attr->[4] =~ s/$CtrlChars//go; # strip ctrls
2089 $Attr->[4] =~ s/$QuoteRe/'&'.$CharToEntity{$1}.';'/eg;
2090 }
19582091
19592092 # Add to attributes to output
19602093 push @OutputAttributes, $Attr;
19652098
19662099 # Append all remaining attribute keys (which must have been newly added attributes by
19672100 # the callback)and values in no particular order
2101 my $QuoteRe = $QuoteRe{'"'};
19682102 while (my ($Key,$Value) = each %AttributeHash ) {
19692103 my $Attr = [" " . $Key, "", "=", '"', $$Value, '"', ""];
19702104 if (defined $Attr->[4]) {
1971 $Attr->[4] =~ s/(['"<>&])/'&'.$CharToEntity{$1}.';'/eg
2105 $Attr->[4] =~ s/$CtrlChars//g; # strip ctrls
2106 $Attr->[4] =~ s/$QuoteRe/'&'.$CharToEntity{$1}.';'/eg
19722107 } else {
19732108 @$Attr[2..6] = (undef) x 5;
19742109 }
19822117 if ($DefangTag == DEFANG_DEFAULT && (my $TagOps = $Tags{$lcTag})) {
19832118 $DefangTag = DEFANG_NONE;
19842119 }
1985
2120 # Unless we have overrides
2121 $DefangTag = DEFANG_ALWAYS
2122 if defined($DefangTagUrlOverride) && $DefangTagUrlOverride == DEFANG_ALWAYS;
2123 $DefangTag = DEFANG_ALWAYS
2124 if defined($DefangTagAttrOverride) && $DefangTagAttrOverride == DEFANG_ALWAYS;
2125
19862126 return $DefangTag;
19872127 }
19882128
1989 sub track_tags {
1990 my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagContentR) = @_;
1991 my $lcTag = lc $Tag;
1992
1993 my ($OpenedTags, $OpenedTagsCount, $OpenedNestedTags)
1994 = @$Self{qw(opened_tags opened_tags_count opened_nested_tags)};
1995 my $IsTagToFix = $Self->{mismatched_tags_to_fix}->{$lcTag};
2129 sub defang_attribute_value {
2130 my ($Self, $AttrValR, $AttribRule, $DefangAttrib) = @_;
2131
2132 my $Debug = $Self->{Debug};
2133
2134 my $AttrValStripped = $Self->strip_attribute($$AttrValR);
2135
2136 warn "defang_attribute_value AttrVal=$$AttrValR AttrValStripped=$AttrValStripped" if $Debug;
2137
2138 if (($DefangAttrib == DEFANG_DEFAULT) && $AttribRule) {
2139 my $Rule = $Rules{$AttribRule};
2140 warn "defang_attributes AttribRule=$AttribRule Rule=$Rule" if $Debug;
2141
2142 # We whitelist the attribute if the value matches the rule
2143 if (ref($Rule) eq "Regexp") {
2144 $DefangAttrib = ($AttrValStripped =~ $Rule) ? DEFANG_NONE : DEFANG_ALWAYS;
2145 }
2146
2147 # Hack. Ref to array is a blacklist regexp
2148 if (ref($Rule) eq "ARRAY") {
2149 $DefangAttrib = ($AttrValStripped =~ $Rule->[0]) ? DEFANG_ALWAYS : DEFANG_NONE;
2150 }
2151
2152 } elsif (!$AttribRule && $DefangAttrib != DEFANG_NONE) {
2153 $DefangAttrib = DEFANG_ALWAYS;
2154 }
2155
2156 warn "defang_attribute_value DefangAttrib=$DefangAttrib" if $Debug;
2157
2158 return $DefangAttrib;
2159 }
2160
2161 sub track_implicit_tags {
2162 my ($Self, $OutR, $HtmlR, $lcTag, $IsEndTag) = @_;
2163
2164 my $OpenedTags = $Self->{opened_tags};
2165 return if !@$OpenedTags;
2166
2167 # If just closing the last tag, nothing to do
2168 my $LastTag = $OpenedTags->[-1]->[0];
2169
2170 # Are we expecting a particular tag based on last open tag?
2171 if (my $ImplicitTags = $ImplicitOpenTags{$LastTag}) {
2172
2173 # We didn't get a tag we were expecting (eg <table><div> rather
2174 # than <table><tbody><tr><td><div>), so insert opening tags recursively
2175 $LastTag = $lcTag;
2176 while ($ImplicitTags && (!$ImplicitTags->{$LastTag} || $IsEndTag)) {
2177 my $Tag = $ImplicitTags->{default};
2178 # Don't insert implicit tag if it's actually the one we actually just parsed
2179 last if !$IsEndTag && $Tag eq $lcTag;
2180 $Self->open_tag(1, $OutR, $HtmlR, $Tag, \"");
2181 $$OutR .= "<!-- $Tag implicit open due to $LastTag -->" unless $Self->{quiet};
2182 $LastTag = $Tag;
2183 $ImplicitTags = $ImplicitOpenTags{$LastTag};
2184 }
2185 }
2186 }
2187
2188 sub track_in_to_block_tags {
2189 my ($Self, $OutR, $HtmlR, $lcTag) = @_;
19962190
19972191 # If we've got a block tag, then close any inline tags
19982192 # before the block tag. We'll re-opened them again below
1999 if ($BlockTags{$lcTag}) {
2193 if ($BlockTags{$lcTag} && !$TableTags{$lcTag}) {
2194 my ($OpenedTags, $ClosedIntoBlockTags, $Quiet)
2195 = @$Self{qw(opened_tags closed_into_block_tags quiet)};
2196
20002197 while (@$OpenedTags && $InlineTags{$OpenedTags->[-1]->[0]}) {
2001 push @$OpenedNestedTags, my $POTD = $OpenedTags->[-1];
2002 $Self->track_tag(1, $POTD->[0]);
2003 $$OutR .= "<!-- close inline tag into block --></$POTD->[0]>";
2004 }
2005 }
2006
2007 # Track browser implicitly opened tags
2008 if (@$OpenedTags) {
2009
2010 # If just closing the last tag, nothing to do
2011 my $LastTag = $OpenedTags->[-1]->[0];
2012 if (!$IsEndTag || $lcTag ne $LastTag) {
2013
2014 # Are we expecting a particular tag based on last open tag?
2015 if (my $ImplicitTags = $ImplicitOpenTags{$LastTag}) {
2016
2017 # We didn't get a tag we were expecting (eg <table><div> rather
2018 # than <table><tbody><tr><td><div>), so insert opening tags recursively
2019 $LastTag = $lcTag;
2020 while ($ImplicitTags && (!$ImplicitTags->{$LastTag} || $IsEndTag)) {
2021 my $Tag = $ImplicitTags->{default};
2022 # Don't insert implicit tag if it's actually the one we actually just parsed
2023 last if !$IsEndTag && $Tag eq $lcTag;
2024 $Self->track_tag(0, $Tag, '');
2025 $$OutR .= "<!-- $Tag implicit open due to $LastTag --><$Tag>";
2026 $LastTag = $Tag;
2027 $ImplicitTags = $ImplicitOpenTags{$LastTag};
2028 }
2029 }
2030 }
2031 }
2032
2033 # Check for correctly nest closing tags
2034 if ($IsEndTag && $IsTagToFix) {
2198 push @$ClosedIntoBlockTags, my $POTD = $OpenedTags->[-1];
2199 $$OutR .= "<!-- close inline tag into block -->" unless $Quiet;
2200 $Self->close_tag(1, $OutR, $HtmlR, $POTD->[0], 1);
2201 }
2202 }
2203 }
2204
2205 sub track_out_of_block_tags {
2206 my ($Self, $OutR, $HtmlR, $lcTag) = @_;
2207
2208 my $ClosedIntoBlockTags = $Self->{closed_into_block_tags};
2209 return if !@$ClosedIntoBlockTags;
2210
2211 # Re-open inline tags into this block
2212
2213 # Peek ahead. If another block tag, don't do this
2214 return if $$HtmlR =~ m{\G(?=\s*</?($TagNameRE))}gc
2215 && $BlockTags{lc "$1"} && !$TableTags{lc "$1"};
2216
2217 my ($SpanCount, $SpanAttrs) = (0, '');
2218 while (my $POTD = pop @$ClosedIntoBlockTags) {
2219
2220 # Don't add more than 3 span tags with same attrs in a row
2221 if ($SpanCount < 3) {
2222 # Add after the current tag is output
2223 $Self->track_and_add_tag_to_output($POTD);
2224 $Self->add_to_output("<!-- reopen inline tag after block -->") unless $Self->{quiet};
2225 }
2226 $SpanCount = $POTD->[0] eq 'span' && $POTD->[1] eq $SpanAttrs ? $SpanCount+1 : 0;
2227 $SpanAttrs = $POTD->[1];
2228 }
2229 }
2230
2231 sub open_tag {
2232 my ($Self, $AddOutput, $OutR, $HtmlR, $lcTag, $TagContentR, $NoBlockCheck) = @_;
2233
2234 $Self->track_implicit_tags($OutR, $HtmlR, $lcTag, 0);
2235 $Self->track_in_to_block_tags($OutR, $HtmlR, $lcTag)
2236 unless $NoBlockCheck;
2237
2238 # Track this tag that was opened (and all attributes, so we can re-open with the same if needed)
2239 if ($Self->{mismatched_tags_to_fix}->{$lcTag}) {
2240 push @{$Self->{opened_tags}}, [ $lcTag, $$TagContentR ];
2241 $Self->{opened_tags_count}->{$lcTag}++;
2242 }
2243 if ($AddOutput) {
2244 $$OutR .= "<${lcTag}${$TagContentR}>";
2245 }
2246
2247 $Self->track_out_of_block_tags($OutR, $HtmlR, $lcTag)
2248 unless $NoBlockCheck;
2249
2250 return DEFANG_NONE;
2251 }
2252
2253 my %RECache;
2254 sub close_tag {
2255 my ($Self, $AddOutput, $OutR, $HtmlR, $lcTag, $NoBlockCheck) = @_;
2256
2257 my ($OpenedTags, $OpenedTagsCount, $Quiet)
2258 = @$Self{qw(opened_tags opened_tags_count quiet)};
2259
2260 # If just closing the last tag, nothing to do
2261 if (@$OpenedTags && $OpenedTags->[-1]->[0] ne $lcTag) {
2262 $Self->track_implicit_tags($OutR, $HtmlR, $lcTag, 1);
2263 }
2264
2265 $Self->track_in_to_block_tags($OutR, $HtmlR, $lcTag)
2266 unless $NoBlockCheck;
2267
2268 # Check for correctly nested closing tags
2269 my $IsTagToFix = $Self->{mismatched_tags_to_fix}->{$lcTag};
2270 if ($IsTagToFix) {
20352271 my ($Found, $ClosingTags) = (0, '');
20362272
20372273 # Tag not even open, just defang it
20522288 }
20532289
20542290 # Close this mismatched tag
2055 $Self->track_tag(1, $PreviousOpenedTag);
2056 $$OutR .= "<!-- close mismatched tag --></$PreviousOpenedTag>";
2057 }
2058
2059 # Should have popped stack correctly if found
2060 if ($Found) {
2061 pop @$OpenedTags;
2062 $OpenedTagsCount->{$lcTag}--;
2291 $$OutR .= "<!-- close mismatched tag -->" unless $Quiet;
2292 $Self->close_tag(1, $OutR, $HtmlR, $PreviousOpenedTag);
2293 }
20632294
20642295 # Otherwise hit tag that stops breaking out, defang it
2065 } else {
2296 if (!$Found) {
20662297 return DEFANG_ALWAYS;
20672298 }
2068
2069 }
2070
2071
2072 # Track this tag that was opened (and all attributes, so we can re-open with the same if needed)
2073 if (!$IsEndTag && $IsTagToFix) {
2074 push @$OpenedTags, [ $lcTag, $$TagContentR ];
2075 $OpenedTagsCount->{$lcTag}++;
2076 }
2077
2078 # Re-open inline tags into this block
2079 if ($BlockTags{$lcTag}) {
2080 # Peek ahead. If another block tag, don't do this
2081 if ($$HtmlR !~ m{\G(?=\s*</?($TagNameRE))}gc || !$BlockTags{lc "$1"}) {
2082 my ($SpanCount, $SpanAttrs) = (0, '');
2083 while (my $POTD = pop @$OpenedNestedTags) {
2084
2085 # Don't add more than 3 span tags with same attrs in a row
2086 if ($SpanCount < 3) {
2087 # Add after the current tag is output
2088 $Self->track_tag(0, @$POTD);
2089 $Self->add_to_output("<!-- reopen inline tag after block --><$POTD->[0]$POTD->[1]>");
2090 }
2091 $SpanCount = $POTD->[0] eq 'span' && $POTD->[1] eq $SpanAttrs ? $SpanCount+1 : 0;
2092 $SpanAttrs = $POTD->[1];
2299 }
2300
2301 my $Result = DEFANG_NONE;
2302
2303 if ($IsTagToFix && $Self->{empty_tags_to_collapse}->{$lcTag}) {
2304
2305 # Check if previous output is open tag, or just comments
2306 my $LastTagPos = rindex($$OutR, '<');
2307 while ($LastTagPos >= 0) {
2308 pos($$OutR) = $LastTagPos;
2309 my $RE = (defined $RECache{$lcTag} ? $RECache{$lcTag} : qr/\G<${lcTag}\b[^<>]*>\s*(?:<|$)/);
2310 if ($$OutR =~ /$RE/gc) {
2311 substr($$OutR, $LastTagPos) = '';
2312 ($AddOutput, $Result) = (0, DEFANG_ALWAYS);
2313 last;
2314 } elsif ($$OutR =~ /\G<!--(?!KEEP)[^<>]*-->\s*(?:<|$)/gc && $LastTagPos > 0) {
2315 $LastTagPos = rindex($$OutR, '<', $LastTagPos-1);
2316 } else {
2317 last;
20932318 }
20942319 }
20952320 }
20962321
2097 return DEFANG_NONE;
2098 }
2099
2100 sub track_tag {
2101 my ($Self, $IsEndTag, $Tag, $Attr) = @_;
2102 my $lcTag = lc $Tag;
2103
2104 if ($IsEndTag) {
2322 if ($IsTagToFix) {
21052323 if ($lcTag eq $Self->{opened_tags}->[-1]->[0]) {
21062324 pop @{$Self->{opened_tags}};
21072325 $Self->{opened_tags_count}->{$lcTag}--;
21082326 } else {
21092327 warn "Unexpected tag stack. Expected $lcTag, found " . $Self->{opened_tags}->[-1]->[0];
21102328 }
2111 } else {
2112 push @{$Self->{opened_tags}}, [ $lcTag, $Attr ];
2113 $Self->{opened_tags_count}->{$lcTag}++;
2114 }
2329 }
2330 if ($AddOutput) {
2331 $$OutR .= "</$lcTag>";
2332 }
2333
2334 $Self->track_out_of_block_tags($OutR, $HtmlR, $lcTag)
2335 unless $NoBlockCheck;
2336
2337 return $Result;
21152338 }
21162339
2117 sub close_tags {
2118 my ($Self, $OutR) = @_;
2340 sub close_all_tags {
2341 my ($Self, $OutR, $HtmlR) = @_;
21192342
21202343 my $RemainingClosingTags = '';
21212344
2122 my ($OpenedTags, $OpenedTagsCount) = @$Self{qw(opened_tags opened_tags_count)};
2123 while (my $PreviousOpenedTag = pop @$OpenedTags) {
2124 $RemainingClosingTags .= "<!-- close unclosed tag --></$PreviousOpenedTag->[0]>";
2125 $OpenedTagsCount->{$PreviousOpenedTag}--;
2126 }
2127 $$OutR .= $RemainingClosingTags;
2345 my ($OpenedTags, $OpenedTagsCount, $Quiet)
2346 = @$Self{qw(opened_tags opened_tags_count quiet)};
2347
2348 for (reverse @$OpenedTags) {
2349 $$OutR .= "<!-- close unclosed tag -->" unless $Quiet;
2350 $Self->close_tag(1, $OutR, $HtmlR, $_->[0]);
2351 }
21282352
21292353 # Also clear implicit tags
21302354 $Self->{opened_nested_tags} = [];
21582382 =back
21592383
21602384 =cut
2161
21622385 sub cleanup_attribute {
2163 my ($Self, $Attr, $AttrKey, $AttrVal) = @_;
2386 my ($Self, $AttrVal) = @_;
21642387
21652388 return (undef, '') unless defined($AttrVal);
21662389
21802403 # These get requoted when we output the attribute
21812404 $AttrVal =~ s/&(quot|apos|amp|lt|gt);?/$EntityToChar{lc($1)} || warn "no entity for: $1"/egi;
21822405
2183 my $AttrValStripped = $AttrVal;
2406 # Have to upgrade string to unicode string if entity expansion
2407 # resulted in non-ascii char
2408 utf8::upgrade($AttrVal) if $UnicodeEntity;
2409
2410 warn "cleanup_attribute AttrVal=$AttrVal" if $Self->{Debug};
2411 return $AttrVal;
2412 }
2413
2414 sub strip_attribute {
2415 my ($Self, $AttrVal) = @_;
21842416
21852417 # In JS, \u000a is unicode char (note &#x5c;&#x75;&#x30;&#x30;&#x37;&#x32; -> \u0072 -> r, so do HTML entities first)
21862418 # This can't be undone, so only do on stripped value
2187 $AttrValStripped =~ s/\\u(0?[\da-f]{1,6});?/defined($1) && hex($1) < 1_114_111 && hex($1) != 65535 && !(hex($1) > 55295 && hex($1) < 57344) ? chr(hex($1)) : ""/egi;
2419 $AttrVal =~ s/\\u(0?[\da-f]{1,6});?/defined($1) && hex($1) < 1_114_111 && hex($1) != 65535 && !(hex($1) > 55295 && hex($1) < 57344) ? chr(hex($1)) : ""/egi;
21882420
21892421 # Also undo URL decoding for "stripped" value
21902422 # (can't do this above, because it's non-reversible, eg "http://...?a=%25" => "http://...?a=?",
21912423 # how would we know which ? to requote when outputting?)
2192 $AttrValStripped =~ s/%([\da-f]{2})/chr(hex($1))/egi;
2193 $AttrValStripped =~ s/[\x00-\x19]*//g;
2194 $AttrValStripped =~ s/^\x20*//g; # http://ha.ckers.org/xss.html#XSS_Spaces_meta_chars
2195
2196 # Have to upgrade string to unicode string if entity expansion
2197 # resulted in non-ascii char
2198 utf8::upgrade($AttrVal) if $UnicodeEntity;
2199
2200 warn "cleanup_attribute AttrValStripped=$AttrVal" if $Self->{Debug};
2201 return ($AttrVal, $AttrValStripped);
2424 $AttrVal =~ s/%([\da-f]{2})/chr(hex($1))/egi;
2425 $AttrVal =~ s/[\x00-\x19]+//g;
2426 $AttrVal =~ s/^[\x20]+//g; # http://ha.ckers.org/xss.html#XSS_Spaces_meta_chars
2427
2428 return $AttrVal;
22022429 }
22032430
22042431 sub get_applicable_charset {
22452472
22462473 =head1 COPYRIGHT AND LICENSE
22472474
2248 Copyright (C) 2003-2010 by Opera Software Australia Pty Ltd
2475 Copyright (C) 2003-2013 by FastMail Pty Ltd
22492476
22502477 This library is free software; you can redistribute it and/or modify
22512478 it under the same terms as Perl itself.
00 #!/usr/bin/perl -w
1 BEGIN { do '/home/mod_perl/hm/ME/FindLibs.pm'; }
2
3 use Test::More tests => 89;
1
2 BEGIN { # CPAN users don't have ME::*, so use eval
3 eval 'use ME::FindLibs'
4 }
5
6 use Test::More tests => 93;
47 use HTML::Defang;
58 use strict;
69
7275 25:<![if gte IE 4]>
7376 26:<SCRIPT>alert('XSS');</SCRIPT>
7477 27:<![endif]>
78 27a:<!--[if gte IE 4]--><foo>
7579 28:<XML ID=I><X><C>
7680 29:<![CDATA[<IMG SRC="javas]]>
7781 30:<![CDATA[cript:alert('XSS');">
121125 like($Res, qr{22:<!--${CommentStartText}\[if gte IE 4\]>}, "IE conditional downlevel-hidden comment start");
122126 like($Res, qr{23:<SCRIPT>alert\('XSS'\);</SCRIPT>}, "IE conditional downlevel-hidden comment body");
123127 like($Res, qr{24:<!\[endif\]${CommentEndText}-->}, "IE conditional downlevel-hidden comment end");
124 like($Res, qr{25:<!--${CommentStartText}\[if gte IE 4\]-->}, "IE conditional downlevel-revealed comment start");
128 like($Res, qr{25:<!--${CommentStartText}\[if gte IE 4\]${CommentEndText}-->}, "IE conditional downlevel-revealed comment start");
125129 like($Res, qr{26:<!--defang_SCRIPT--><!-- alert\('XSS'\); --><!--/defang_SCRIPT-->}, "IE conditional downlevel-revealed comment body");
126 like($Res, qr{27:<!--\[endif\]${CommentEndText}-->}, "IE conditional downlevel-revealed comment end");
130 like($Res, qr{27:<!--${CommentStartText}\[endif\]${CommentEndText}-->}, "IE conditional downlevel-revealed comment end");
131 like($Res, qr{27a:<!--${CommentStartText}\[if gte IE 4\]${CommentEndText}--><!--${DefangString}foo-->}, "IE conditional defang content");
127132
128133 # Some XML tests
129134 # Refer http://www.w3schools.com/XML/xml_cdata.asp for information on CDATA
177182
178183 like($Res, qr{<img width="1" defang_/ = "/">}, "Use '/' as an attribute key");
179184
185 $H = <<'EOF';
186 1:<img width="1" / style="color: red">
187 EOF
188 $Res = $Defang->defang($H);
189
190 like($Res, qr{^1:<img width="1" / style="color: red">$}, "Stray / in tag");
191
180192 $H = <<EOF;
181193 1:<html><!--
182194 EOF
242254 EOF
243255 $Res = $Defang->defang($H);
244256
245 like($Res, qr{<!--defang_script-->
246 <!-- /\*multi line script start\*/
257 like($Res, qr{<!--defang_script--><!--
258 /\*multi line script start\*/
247259 alert\("XSS"\);
248 /\*multi line script end\*/ -->
249 <!--/defang_script-->}, "Multi-line <script> tag with opening HTML comments alone");
260 /\*multi line script end\*/
261 --><!--/defang_script-->}, "Multi-line <script> tag with opening HTML comments alone");
250262
251263 $H = <<'EOF';
252264 <script>
256268 EOF
257269 $Res = $Defang->defang($H);
258270
259 like($Res, qr{<!--defang_script-->
260 <!--
261 alert\("XSS"\); -->
262 <!--/defang_script-->}, "Multi-line <script> tag with closing HTML comments alone");
271 like($Res, qr{<!--defang_script--><!--
272
273 alert\("XSS"\);
274 --><!--/defang_script-->}, "Multi-line <script> tag with closing HTML comments alone");
263275
264276 $Defang = HTML::Defang->new(
265277 fix_mismatched_tags => 1,
311323 $H = <<EOF;
312324 <table>
313325 <tr>
314 <td><i>
326 <td><i>non-blank
315327 <pre>
316328 </tr>
317329 EOF
320332
321333 like($Res, qr{^<table>
322334 <tr>
323 <td><i>
324 <pre>
325 </pre></td></tr>
335 <td><i>non-blank
336 </i><pre><i>
337 </i></pre></td></tr>
326338 </table>$}, "Add multiple missing closing tags when one closing tag and one non-callback tag is present");
327339
328340 $H = <<EOF;
361373 </pre>$}, "Add missing closing tag to end of HTML");
362374
363375 $H = <<EOF;
376 <PRE>
377 <div></a></A>
378 </DIV>
379 </pre>
380 EOF
381 $Res = $Defang->defang($H);
382 $Res =~ s/<!--.*?-->//g;
383
384 like($Res, qr{^<PRE>
385 <div>
386 </DIV>
387 </pre>$}, "Check uppercase/lowercase tags");
388
389 $H = <<EOF;
364390 <table><tr><td>before-font</font>after-font
365391 EOF
366392 $Res = $Defang->defang($H);
384410 $Res = $Defang->defang($H);
385411 $Res =~ s/<!--.*?-->//g;
386412
387 like($Res, qr{^<table><tr><td></td></tr></table>$}, "Check implicit opening tags 2");
413 like($Res, qr{^<table><tr><td>
414 </td></tr></table>$}, "Check implicit opening tags 2");
388415
389416 $H = <<EOF;
390417 <table><tr><td><table></table><div>
455482 EOF
456483 $Res = $Defang->defang($H);
457484
458 like($Res, qr{<!--$CommentStartText\[if gte mso 10\]> <mce:style><! /\* Style Definitions \*/ table.MsoNormalTable {mso-fareast-font-family:"Times New Roman";} $CommentEndText--> <!--$CommentStartText\[endif\]$CommentEndText--></p>
485 like($Res, qr{<!--$CommentStartText\[if gte mso 10\]> <mce:style><! /\* Style Definitions \*/ table.MsoNormalTable \{mso-fareast-font-family:"Times New Roman";\} $CommentEndText--> <!--$CommentStartText\[endif\]$CommentEndText--></p>
459486 <p>&nbsp;<span style="font-size: medium;">I need your help now!</span></p>}, "IE conditional comment without appropriate closing tag");
460487
461488 $H = <<'EOF';
501528
502529 $H = <<EOF;
503530 1:<unknownTag title="something with -- in it">
531 2:<b><noscript><!-- </noscript><img src=xx: onerror=alert(document.domain) --></noscript>
504532 EOF
505533 $Res = $Defang->defang($H);
506534
507535 like($Res, qr{^1:<!--${DefangString}unknownTag title="something with in it"-->}, "Defang unknown tag with --'s in it");
536 like($Res, qr{^2:<b><!--${DefangString}noscript--><!--$CommentStartText </noscript><img src=xx: onerror=alert\(document\.domain\) $CommentEndText--><!--/${DefangString}noscript-->}m, "Defang noscript tag");
537
Binary diff not shown
00 #!/usr/bin/perl -w
11
2 BEGIN { do '/home/mod_perl/hm/ME/FindLibs.pm'; }
3
4 use Test::More tests => 89;
2 BEGIN { # CPAN users don't have ME::*, so use eval
3 eval 'use ME::FindLibs'
4 }
5
6 use Test::More tests => 93;
57 use HTML::Defang;
68 use strict;
79
810 my ($Res, $H);
9 my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', '', '');
11 my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', ' ', ' ');
1012
1113 my $Defang = HTML::Defang->new();
1214
4042 $Res = $Defang->defang($H);
4143
4244 like($Res, qr{^<style><!--${CommentStartText}
43 p {font-family: "sans serif"}
45 p \{font-family: "sans serif"\}
4446 $CommentEndText--></style>$}s, "Style tag property with quotes and space");
4547
4648 $H = <<EOF;
5153 $Res = $Defang->defang($H);
5254
5355 like($Res, qr{^<style><!--${CommentStartText}
54 p {text-align:center;color:red}
56 p \{text-align:center;color:red\}
5557 $CommentEndText--></style>
5658 $}s, "Multiple properties");
5759
6971
7072 like($Res, qr{^<style><!--${CommentStartText}
7173 p
72 {
74 \{
7375 text-align: center;
7476 color: black;
7577 font-family: arial
76 }
78 \}
7779 $CommentEndText--></style>
7880 $}s, "Multiple properties in readable format");
7981
8991
9092 like($Res, qr{^<style><!--${CommentStartText}
9193 h1,h2,h3,h4,h5,h6
92 {
94 \{
9395 color: green
94 }
96 \}
9597 $CommentEndText--></style>
9698 $}s, "Multiple selectors");
9799
104106 $Res = $Defang->defang($H);
105107
106108 like($Res, qr{^<style><!--${CommentStartText}
107 p.right {text-align: right}
108 p.center {text-align: center}
109 p.right \{text-align: right\}
110 p.center \{text-align: center\}
109111 $CommentEndText--></style>
110112 $}s, "Selector with a period");
111113
117119 $Res = $Defang->defang($H);
118120
119121 like($Res, qr{^<style><!--${CommentStartText}
120 .center {text-align: center}
122 .center \{text-align: center\}
121123 $CommentEndText--></style>
122124 $}, "Selector starting in a period");
123125
129131 $Res = $Defang->defang($H);
130132
131133 like($Res, qr{^<style><!--${CommentStartText}
132 input\[type="text"\] {background-color: blue}
134 input\[type="text"\] \{background-color: blue\}
133135 $CommentEndText--></style>
134136 $}s, "Selector with square brackets");
135137
141143 $Res = $Defang->defang($H);
142144
143145 like($Res, qr{^<style><!--${CommentStartText}
144 #green {color: green}
146 #green \{color: green\}
145147 $CommentEndText--></style>
146148 $}s, "Selector starting with a hash");
147149
158160
159161 like($Res, qr{^<style><!--${CommentStartText}
160162 p#para1
161 {
163 \{
162164 text-align: center;
163165 color: red
164 }
166 \}
165167 $CommentEndText--></style>
166168 $}s, "Selector with a hash");
167169
185187 like($Res, qr{^<style><!--${CommentStartText}
186188
187189 p
188 {
190 \{
189191 text-align: center;
190192
191193 color: black;
192194 font-family: arial
193 }
195 \}
194196 $CommentEndText--></style>
195197 $}s, "All sorts of comments");
196198
248250 EOF
249251 $Res = $Defang->defang($H);
250252
251 like($Res, qr{^<STYLE><!--${CommentStartText}BODY{/\*-moz-binding:url\("http://ha.ckers.org/xssmoz.xml#xss"\)\*/}$CommentEndText--></STYLE>$}s, "Remote style sheet part 4");
253 like($Res, qr{^<STYLE><!--${CommentStartText}BODY\{/\*-moz-binding:url\("http://ha.ckers.org/xssmoz.xml#xss"\)\*/\}$CommentEndText--></STYLE>$}s, "Remote style sheet part 4");
252254
253255 $H = <<EOF;
254256 <STYLE>li {list-style-image: url("javascript:alert('XSS')");}</STYLE><UL><LI>XSS
255257 EOF
256258 $Res = $Defang->defang($H);
257259
258 like($Res, qr{^<STYLE><!--${CommentStartText}li {/\*list-style-image: url\("javascript:alert\('XSS'\)"\);\*/}$CommentEndText--></STYLE><UL><LI>XSS$}s, "List-style-image");
260 like($Res, qr{^<STYLE><!--${CommentStartText}li \{/\*list-style-image: url\("javascript:alert\('XSS'\)"\);\*/\}$CommentEndText--></STYLE><UL><LI>XSS$}s, "List-style-image");
259261
260262 $H = <<'EOF';
261263 <STYLE>@im\port'\ja\vasc\ript:alert("XSS")';</STYLE>
277279 $Res = $Defang->defang($H);
278280 like($Res, qr{^<STYLE><!--${CommentStartText}
279281
280 a{sss:sss}$CommentEndText--></STYLE>$}s, "Removing multiple css imports");
282 a\{sss:sss\}$CommentEndText--></STYLE>$}s, "Removing multiple css imports");
281283
282284 $H = <<EOF;
283285 <STYLE>\@import'javascript:alert("XSS")';
292294 $Res = $Defang->defang($H);
293295 like($Res, qr{^<STYLE><!--${CommentStartText}
294296
295 a{sss:11111111}$CommentEndText--></STYLE>
297 a\{sss:11111111\}$CommentEndText--></STYLE>
296298 <!--defang_someunknowntag-->
297299 <br>
298300 <STYLE><!--${CommentStartText}
299301
300 a{sss:22222222}$CommentEndText--></STYLE>$}s, "Removing multiple css imports with multiple styles");
302 a\{sss:22222222\}$CommentEndText--></STYLE>$}s, "Removing multiple css imports with multiple styles");
301303
302304 $H = <<EOF;
303305 <STYLE>
309311 $Res = $Defang->defang($H);
310312 like($Res, qr{^<STYLE>
311313 <!--${CommentStartText}
312 p {property: value}
314 p \{property: value\}
313315 $CommentEndText-->
314316 </STYLE>$}s, "Removing HTML comments");
315317
470472 <style>em{color:red};\@import url(&#34;style.css&#34;);</style>
471473 EOF
472474 $Res = $Defang->defang($H);
473 like($Res, qr{^<style><!--${CommentStartText}em{color:red}$CommentEndText--></style>$}, "Test 26");
475 like($Res, qr{^<style><!--${CommentStartText}em\{color:red\}$CommentEndText--></style>$}, "Test 26");
474476
475477 $H = <<EOF;
476478 <style>\@import url(&#34;style.css&#34;);</style>
596598 like($Res, qr{26:<a style=" s7 \{ ae : af \} s8 \{ ag : ah \} ">}s, "Test style attribute - multiple property pairs with selectors, braces and spaces but without semi-colon");
597599 like($Res, qr{27:<a style="s5\{ai:aj;\}s6\{ak:al;\}">}s, "Test style attribute - multiple property pairs with selectors, braces and semi-colon but without spaces");
598600 like($Res, qr{28:<a style=" s7 \{ am : an \} s8 \{ ao : ap ; \} ">}s, "Test style attribute - multiple property pairs with selectors, braces spaces and semi-colon");
599 like($Res, qr{29:<a style="{color: #900} :link {background: #ff0} :visited {background: #fff} :hover {outline: thin red solid} :active {background: #00f}">}s, "Test style attribute - style rule with and without selectors");
600 like($Res, qr{30:<a style="{color: #090; line-height: 1.2} ::first-letter {color: #900}">}, "Test style attribute - style rule with and without selectors in single line");
601 like($Res, qr{31:<a href="abccomscript" title="a" id="a1" style="{color: #900}
602 :link {background: #ff0}
603 :visited {background: #fff}
604 :hover {outline: thin red solid}
605 :active {background: #00f}">
606 $}, "Test style attribute - style rule with and without selectors over multiple lines");
601 like($Res, qr{29:<a style="\{color: #900\} :link \{background: #ff0\} :visited \{background: #fff\} :hover \{outline: thin red solid\} :active \{background: #00f\}">}s, "Test style attribute - style rule with and without selectors");
602 like($Res, qr{30:<a style="\{color: #090; line-height: 1.2\} ::first-letter \{color: #900\}">}, "Test style attribute - style rule with and without selectors in single line");
603 like($Res, qr{31:<a href="abccomscript" title="a" id="a1" style="\{color: #900\}&#x0a; :link \{background: #ff0\}&#x0a; :visited \{background: #fff\}&#x0a; :hover \{outline: thin red solid\}&#x0a; :active \{background: #00f\}">$}, "Test style attribute - style rule with and without selectors over multiple lines");
607604
608605 $H = <<EOF;
609606 <style>
624621
625622 like($Res, qr{<style><!--${CommentStartText}
626623
627 selector1{ab:cd}
628 selector2{ab:cd;}
629 selector3{ab:cd;ef:gh}
630 selector4{ab:cd;ef:gh;}
631 selector5{ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/}
632 selector6 { ab : cd }
633 selector7 { ab : cd ; }
634 selector8 { ab : cd ; ef : gh }
635 selector9 { ab : cd ; ef : gh ; }
636 selector10 { ab : cd ; x : y ; /\*r : url\(http://a.com\) \*/}
624 selector1\{ab:cd\}
625 selector2\{ab:cd;\}
626 selector3\{ab:cd;ef:gh\}
627 selector4\{ab:cd;ef:gh;\}
628 selector5\{ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/\}
629 selector6 \{ ab : cd \}
630 selector7 \{ ab : cd ; \}
631 selector8 \{ ab : cd ; ef : gh \}
632 selector9 \{ ab : cd ; ef : gh ; \}
633 selector10 \{ ab : cd ; x : y ; /\*r : url\(http://a.com\) \*/\}
637634 $CommentEndText--></style>}s, "Test style tag css with and without spaces");
638635
639636 $H = <<EOF;
652649
653650 <!--${CommentStartText}
654651
655 body {color: black}
652 body \{color: black\}
656653
657654 $CommentEndText-->
658655 </style>$}s, "Style tag with HTML comments");
665662 $Res = $Defang->defang($H);
666663
667664 like($Res, qr{^<style><!--${CommentStartText}
668 body {color: black}
665 body \{color: black\}
669666 $CommentEndText--></style>$}s, "Style tag without HTML comments");
670667
668 $H = <<EOF;
669 <style><!--
670 body { background: #fff url("javascript:alert('XSS')"); }
671 --></style>
672 EOF
673 $Res = $Defang->defang($H);
674
675 like($Res, qr{^<style><!--${CommentStartText}
676 body \{ /\*background: #fff url\("javascript:alert\('XSS'\)"\);\*/ \}
677 ${CommentEndText}--></style>$}s, "Background with separate url");
678
679 $H = <<EOF;
680 <style><!-- body { */background*/-image: url("javascript:alert('XSS')")/* } --></style>
681 EOF
682 $Res = $Defang->defang($H);
683
684 like($Res, qr{^<style><!--${CommentStartText} body \{ /\*background-image: url\("javascript:alert\('XSS'\)"\) \*/\} ${CommentEndText}--></style>$}s, "Lone end-comment/start-comment in style");
685
686
687 $H = <<EOF;
688 <style>
689 \@media all and (max-width: 699px) {
690 body {
691 border: 10px;
692 color: black;
693 padding: 20px
694 }
695 }
696 \@media all and (min-width: 700px) {
697 body {
698 padding:1px;
699 border: 2px;
700 color: white
701 }
702 }
703 </style>
704 EOF
705 $Res = $Defang->defang($H);
706
707 like($Res, qr{^<style><!--${CommentStartText}
708 \@media all and \(max-width: 699px\) \{
709 body \{
710 border: 10px;
711 color: black;
712 padding: 20px
713 \}
714 \}
715 \@media all and \(min-width: 700px\) \{
716 body \{
717 padding:1px;
718 border: 2px;
719 color: white
720 \}
721 \}
722 $CommentEndText--></style>$}s, "Media selectors");
723
724 $H = <<EOF;
725 <p style="font-size: 30px;; font-weight: lighter ;; line-height: 38px; ; color: #ffffff; font-family: 'Segoe UI Light', 'Segoe WP Light', 'Segoe UI', Helvetica, Arial;; ; ;; ">
726 EOF
727
728 $Res = $Defang->defang($H);
729
730 like($Res, qr{^<p style="font-size: 30px;; font-weight: lighter ;; line-height: 38px; ; color: #ffffff; font-family: 'Segoe UI Light', 'Segoe WP Light', 'Segoe UI', Helvetica, Arial;">}, "Rule with multiple semi-colons");
731
00 #!/usr/bin/perl -w
11
2 BEGIN { do '/home/mod_perl/hm/ME/FindLibs.pm'; }
2 BEGIN { # CPAN users don't have ME::*, so use eval
3 eval 'use ME::FindLibs'
4 }
35
46 use Test::More tests => 33;
57 use HTML::Defang;
810 # Tests taken from http://imfo.ru/csstest/css_hacks/import.php
911
1012 my ($Res, $H);
11 my ($DefangString, ${CommentStartText}, ${CommentEndText}) = ('defang_', '', '');
13 my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', ' ', ' ');
1214
1315 my $Defang = HTML::Defang->new();
1416
166168 <style>em{color:red};\@import url(&#34;style.css&#34;);</style>
167169 EOF
168170 $Res = $Defang->defang($H);
169 like($Res, qr{^<style><!--${CommentStartText}em{color:red}${CommentEndText}--></style>$}, "Test 26");
171 like($Res, qr{^<style><!--${CommentStartText}em\{color:red\}${CommentEndText}--></style>$}, "Test 26");
170172
171173 $H = <<EOF;
172174 <style>\@import url(&#34;style.css&#34;);</style>
00 #!/usr/bin/perl -w
11
2 BEGIN { do '/home/mod_perl/hm/ME/FindLibs.pm'; }
2 BEGIN { # CPAN users don't have ME::*, so use eval
3 eval 'use ME::FindLibs'
4 }
35
46 use Test::More tests => 94;
57 use HTML::Defang;
68 use strict;
79
810 my ($Res, $H);
9 my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', '', '');
11 my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', ' ', ' ');
1012
1113 #################################
1214 # Basic tag callback tests
531533 7:<a style="i3:j3;k3:l3;">
532534 8:<a style=" i4 : j4 ; k4 : l4 ; ">
533535
534 9:<a style="{q:r}">
535 10:<a style=" { s : t } ">
536 11:<a style="{u:v;}">
537 12:<a style=" { w : x ; } ">
538
539 13:<a style="{i5:j5;k5:l5}">
540 14:<a style=" { i6 : j6 ; k6 : l6 } ">
541 15:<a style="{i7:j7;k7:l7;}">
542 16:<a style=" { i8 : j8 ; k8 : l8 ; } ">
543
544 17:<a style="s1{y:z}">
545 18:<a style=" s1 { y2 : z2 } ">
546 19:<a style="s1{y3:z3;}">
547 20:<a style=" s1 { y4 : z4 ; } ">
548
549 21:<a style="s1{y5:z5;y6:z6}">
550 22:<a style=" s2 { y7 : z7 ; y8 : z8 } ">
551 23:<a style="s3{y9:z9;y10:z11;}">
552 24:<a style=" s4 { y12 : z12 ; y13 : z13 ; } ">
553
554 25:<a style="s5{aa:ab}s6{ac:ad}">
555 26:<a style=" s7 { ae : af } s8 { ag : ah } ">
556 27:<a style="s5{ai:aj;}s6{ak:al;}">
557 28:<a style=" s7 { am : an } s8 { ao : ap ; } ">
558
559 29:<a style="{color: #900} :link {background: #ff0} :visited {background: #fff} :hover {outline: thin red solid} :active {background: #00f}">
560 30:<a style="{color: #090; line-height: 1.2} ::first-letter {color: #900}">
561 31:<a href="abccomscript" title="a" id="a1" style="{color: #900}
562 :link {background: #ff0}
563 :visited {background: #fff}
564 :hover {outline: thin red solid}
565 :active {background: #00f}">
536 9:<a style="\{q:r\}">
537 10:<a style=" \{ s : t \} ">
538 11:<a style="\{u:v;\}">
539 12:<a style=" \{ w : x ; \} ">
540
541 13:<a style="\{i5:j5;k5:l5\}">
542 14:<a style=" \{ i6 : j6 ; k6 : l6 \} ">
543 15:<a style="\{i7:j7;k7:l7;\}">
544 16:<a style=" \{ i8 : j8 ; k8 : l8 ; \} ">
545
546 17:<a style="s1\{y:z\}">
547 18:<a style=" s1 \{ y2 : z2 \} ">
548 19:<a style="s1\{y3:z3;\}">
549 20:<a style=" s1 \{ y4 : z4 ; \} ">
550
551 21:<a style="s1\{y5:z5;y6:z6\}">
552 22:<a style=" s2 \{ y7 : z7 ; y8 : z8 \} ">
553 23:<a style="s3\{y9:z9;y10:z11;\}">
554 24:<a style=" s4 \{ y12 : z12 ; y13 : z13 ; \} ">
555
556 25:<a style="s5\{aa:ab\}s6\{ac:ad\}">
557 26:<a style=" s7 \{ ae : af \} s8 \{ ag : ah \} ">
558 27:<a style="s5\{ai:aj;\}s6\{ak:al;\}">
559 28:<a style=" s7 \{ am : an \} s8 \{ ao : ap ; \} ">
560
561 29:<a style="\{color: #900\} :link \{background: #ff0\} :visited \{background: #fff\} :hover \{outline: thin red solid\} :active \{background: #00f\}">
562 30:<a style="\{color: #090; line-height: 1.2\} ::first-letter \{color: #900\}">
563 31:<a href="abccomscript" title="a" id="a1" style="\{color: #900\}&#x0a; :link \{background: #ff0\}&#x0a; :visited \{background: #fff\}&#x0a; :hover \{outline: thin red solid\}&#x0a; :active \{background: #00f\}">
566564 <style><!--${CommentStartText}
567565
568 selector1{ab:cd}
569 selector2{ab:cccd;}
570 selector3{ab:cd;ef:gh}
571 selector4{ab:cd;ef:gh;}
572 selector5{ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/}
573 selector6 { ab : cd }
574 selector7 { ab : cd ; }
575 selector8 { ab : cd ; ef : gh }
576 selector9 { ab : cd ; ef : gh ; }
577 selector10 { ab : cd ; x : y ; /\*r : url\(http://a.com\) \*/}
566 selector1\{ab:cd\}
567 selector2\{ab:cccd;\}
568 selector3\{ab:cd;ef:gh\}
569 selector4\{ab:cd;ef:gh;\}
570 selector5\{ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/\}
571 selector6 \{ ab : cd \}
572 selector7 \{ ab : cd ; \}
573 selector8 \{ ab : cd ; ef : gh \}
574 selector9 \{ ab : cd ; ef : gh ; \}
575 selector10 \{ ab : cd ; x : y ; /\*r : url\(http://a.com\) \*/\}
578576 ${CommentEndText}--></style>$}, "CSS callback - force normal");
579577
580578 $Defang = HTML::Defang->new(
658656 7:<a style="i3:j3;k3:l3;">
659657 8:<a style=" i4 : j4 ; k4 : l4 ; ">
660658
661 9:<a style="{q:r}">
662 10:<a style=" { s : t } ">
663 11:<a style="{u:v;}">
664 12:<a style=" { w : x ; } ">
665
666 13:<a style="{i5:j5;k5:l5}">
667 14:<a style=" { i6 : j6 ; k6 : l6 } ">
668 15:<a style="{i7:j7;k7:l7;}">
669 16:<a style=" { i8 : j8 ; k8 : l8 ; } ">
670
671 17:<a style="s1{y:z}">
672 18:<a style=" s1 { y2 : z2 } ">
673 19:<a style="s1{y3:z3;}">
674 20:<a style=" s1 { y4 : z4 ; } ">
675
676 21:<a style="s1{y5:z5;y6:z6}">
677 22:<a style=" s2 { y7 : z7 ; y8 : z8 } ">
678 23:<a style="s3{y9:z9;y10:z11;}">
679 24:<a style=" s4 { y12 : z12 ; y13 : z13 ; } ">
680
681 25:<a style="s5{aa:ab}s6{ac:ad}">
682 26:<a style=" s7 { ae : af } s8 { ag : ah } ">
683 27:<a style="s5{ai:aj;}s6{ak:al;}">
684 28:<a style=" s7 { am : an } s8 { ao : ap ; } ">
685
686 29:<a style="{color: #900} :link {background: #ff0} :visited {background: #fff} :hover {outline: thin red solid} :active {background: #00f}">
687 30:<a style="{color: #090; line-height: 1.2} ::first-letter {color: #900}">
688 31:<a href="abccomscript" title="a" id="a1" style="{color: #900}
689 :link {background: #ff0}
690 :visited {background: #fff}
691 :hover {outline: thin red solid}
692 :active {background: #00f}">
659 9:<a style="\{q:r\}">
660 10:<a style=" \{ s : t \} ">
661 11:<a style="\{u:v;\}">
662 12:<a style=" \{ w : x ; \} ">
663
664 13:<a style="\{i5:j5;k5:l5\}">
665 14:<a style=" \{ i6 : j6 ; k6 : l6 \} ">
666 15:<a style="\{i7:j7;k7:l7;\}">
667 16:<a style=" \{ i8 : j8 ; k8 : l8 ; \} ">
668
669 17:<a style="s1\{y:z\}">
670 18:<a style=" s1 \{ y2 : z2 \} ">
671 19:<a style="s1\{y3:z3;\}">
672 20:<a style=" s1 \{ y4 : z4 ; \} ">
673
674 21:<a style="s1\{y5:z5;y6:z6\}">
675 22:<a style=" s2 \{ y7 : z7 ; y8 : z8 \} ">
676 23:<a style="s3\{y9:z9;y10:z11;\}">
677 24:<a style=" s4 \{ y12 : z12 ; y13 : z13 ; \} ">
678
679 25:<a style="s5\{aa:ab\}s6\{ac:ad\}">
680 26:<a style=" s7 \{ ae : af \} s8 \{ ag : ah \} ">
681 27:<a style="s5\{ai:aj;\}s6\{ak:al;\}">
682 28:<a style=" s7 \{ am : an \} s8 \{ ao : ap ; \} ">
683
684 29:<a style="\{color: #900\} :link \{background: #ff0\} :visited \{background: #fff\} :hover \{outline: thin red solid\} :active \{background: #00f\}">
685 30:<a style="\{color: #090; line-height: 1.2\} ::first-letter \{color: #900\}">
686 31:<a href="abccomscript" title="a" id="a1" style="\{color: #900\}&#x0a; :link \{background: #ff0\}&#x0a; :visited \{background: #fff\}&#x0a; :hover \{outline: thin red solid\}&#x0a; :active \{background: #00f\}">
693687 <style><!--${CommentStartText}
694688
695 selector1{ab:cd}
696 selector2{ab:cccd;}
697 selector3{ab:cd;ef:gh}
698 selector4{ab:cd;ef:gh;}
699 selector5{ab:cd;x:y;p:q;r:url\(http://a.com\);e:url\("http://b.com"\) ;}
700 selector6 { ab : cd }
701 selector7 { ab : cd ; }
702 selector8 { ab : cd ; ef : gh }
703 selector9 { ab : cd ; ef : gh ; }
704 selector10 { ab : cd ; x : y ; r : url\(http://a.com\) }
689 selector1\{ab:cd\}
690 selector2\{ab:cccd;\}
691 selector3\{ab:cd;ef:gh\}
692 selector4\{ab:cd;ef:gh;\}
693 selector5\{ab:cd;x:y;p:q;r:url\(http://a.com\);e:url\("http://b.com"\) ;\}
694 selector6 \{ ab : cd \}
695 selector7 \{ ab : cd ; \}
696 selector8 \{ ab : cd ; ef : gh \}
697 selector9 \{ ab : cd ; ef : gh ; \}
698 selector10 \{ ab : cd ; x : y ; r : url\(http://a.com\) \}
705699 ${CommentEndText}--></style>$}, "CSS callback - force skip");
706700
707701 $Defang = HTML::Defang->new(
785779 7:<a style="/\*i3:j3;\*//\*k3:l3;\*/">
786780 8:<a style=" /\*i4 : j4 ;\*/ /\*k4 : l4 ;\*/ ">
787781
788 9:<a style="{/\*q:r\*/}">
789 10:<a style=" { /\*s : t \*/} ">
790 11:<a style="{/\*u:v;\*/}">
791 12:<a style=" { /\*w : x ;\*/ } ">
792
793 13:<a style="{/\*i5:j5;\*//\*k5:l5\*/}">
794 14:<a style=" { /\*i6 : j6 ;\*/ /\*k6 : l6 \*/} ">
795 15:<a style="{/\*i7:j7;\*//\*k7:l7;\*/}">
796 16:<a style=" { /\*i8 : j8 ;\*/ /\*k8 : l8 ;\*/ } ">
797
798 17:<a style="s1{/\*y:z\*/}">
799 18:<a style=" s1 { /\*y2 : z2 \*/} ">
800 19:<a style="s1{/\*y3:z3;\*/}">
801 20:<a style=" s1 { /\*y4 : z4 ;\*/ } ">
802
803 21:<a style="s1{/\*y5:z5;\*//\*y6:z6\*/}">
804 22:<a style=" s2 { /\*y7 : z7 ;\*/ /\*y8 : z8 \*/} ">
805 23:<a style="s3{/\*y9:z9;\*//\*y10:z11;\*/}">
806 24:<a style=" s4 { /\*y12 : z12 ;\*/ /\*y13 : z13 ;\*/ } ">
807
808 25:<a style="s5{/\*aa:ab\*/}s6{/\*ac:ad\*/}">
809 26:<a style=" s7 { /\*ae : af \*/} s8 { /\*ag : ah \*/} ">
810 27:<a style="s5{/\*ai:aj;\*/}s6{/\*ak:al;\*/}">
811 28:<a style=" s7 { /\*am : an \*/} s8 { /\*ao : ap ;\*/ } ">
812
813 29:<a style="{/\*color: #900\*/} :link {/\*background: #ff0\*/} :visited {/\*background: #fff\*/} :hover {/\*outline: thin red solid\*/} :active {/\*background: #00f\*/}">
814 30:<a style="{/\*color: #090;\*/ /\*line-height: 1.2\*/} ::first-letter {/\*color: #900\*/}">
815 31:<a href="abccomscript" title="a" id="a1" style="{/\*color: #900\*/}
816 :link {/\*background: #ff0\*/}
817 :visited {/\*background: #fff\*/}
818 :hover {/\*outline: thin red solid\*/}
819 :active {/\*background: #00f\*/}">
782 9:<a style="\{/\*q:r\*/\}">
783 10:<a style=" \{ /\*s : t \*/\} ">
784 11:<a style="\{/\*u:v;\*/\}">
785 12:<a style=" \{ /\*w : x ;\*/ \} ">
786
787 13:<a style="\{/\*i5:j5;\*//\*k5:l5\*/\}">
788 14:<a style=" \{ /\*i6 : j6 ;\*/ /\*k6 : l6 \*/\} ">
789 15:<a style="\{/\*i7:j7;\*//\*k7:l7;\*/\}">
790 16:<a style=" \{ /\*i8 : j8 ;\*/ /\*k8 : l8 ;\*/ \} ">
791
792 17:<a style="s1\{/\*y:z\*/\}">
793 18:<a style=" s1 \{ /\*y2 : z2 \*/\} ">
794 19:<a style="s1\{/\*y3:z3;\*/\}">
795 20:<a style=" s1 \{ /\*y4 : z4 ;\*/ \} ">
796
797 21:<a style="s1\{/\*y5:z5;\*//\*y6:z6\*/\}">
798 22:<a style=" s2 \{ /\*y7 : z7 ;\*/ /\*y8 : z8 \*/\} ">
799 23:<a style="s3\{/\*y9:z9;\*//\*y10:z11;\*/\}">
800 24:<a style=" s4 \{ /\*y12 : z12 ;\*/ /\*y13 : z13 ;\*/ \} ">
801
802 25:<a style="s5\{/\*aa:ab\*/\}s6\{/\*ac:ad\*/\}">
803 26:<a style=" s7 \{ /\*ae : af \*/\} s8 \{ /\*ag : ah \*/\} ">
804 27:<a style="s5\{/\*ai:aj;\*/\}s6\{/\*ak:al;\*/\}">
805 28:<a style=" s7 \{ /\*am : an \*/\} s8 \{ /\*ao : ap ;\*/ \} ">
806
807 29:<a style="\{/\*color: #900\*/\} :link \{/\*background: #ff0\*/\} :visited \{/\*background: #fff\*/\} :hover \{/\*outline: thin red solid\*/\} :active \{/\*background: #00f\*/\}">
808 30:<a style="\{/\*color: #090;\*/ /\*line-height: 1.2\*/\} ::first-letter \{/\*color: #900\*/\}">
809 31:<a href="abccomscript" title="a" id="a1" style="\{/\*color: #900\*/\}&#x0a; :link \{/\*background: #ff0\*/\}&#x0a; :visited \{/\*background: #fff\*/\}&#x0a; :hover \{/\*outline: thin red solid\*/\}&#x0a; :active \{/\*background: #00f\*/\}">
820810 <style><!--${CommentStartText}
821811
822 selector1{/\*ab:cd\*/}
823 selector2{/\*ab:cccd;\*/}
824 selector3{/\*ab:cd;\*//\*ef:gh\*/}
825 selector4{/\*ab:cd;\*//\*ef:gh;\*/}
826 selector5{/\*ab:cd;\*//\*x:y;\*//\*p:q;\*//\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/}
827 selector6 { /\*ab : cd \*/}
828 selector7 { /\*ab : cd ;\*/ }
829 selector8 { /\*ab : cd ;\*/ /\*ef : gh \*/}
830 selector9 { /\*ab : cd ;\*/ /\*ef : gh ;\*/ }
831 selector10 { /\*ab : cd ;\*/ /\*x : y ;\*/ /\*r : url\(http://a.com\) \*/}
812 selector1\{/\*ab:cd\*/\}
813 selector2\{/\*ab:cccd;\*/\}
814 selector3\{/\*ab:cd;\*//\*ef:gh\*/\}
815 selector4\{/\*ab:cd;\*//\*ef:gh;\*/\}
816 selector5\{/\*ab:cd;\*//\*x:y;\*//\*p:q;\*//\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/\}
817 selector6 \{ /\*ab : cd \*/\}
818 selector7 \{ /\*ab : cd ;\*/ \}
819 selector8 \{ /\*ab : cd ;\*/ /\*ef : gh \*/\}
820 selector9 \{ /\*ab : cd ;\*/ /\*ef : gh ;\*/ \}
821 selector10 \{ /\*ab : cd ;\*/ /\*x : y ;\*/ /\*r : url\(http://a.com\) \*/\}
832822 ${CommentEndText}--></style>}, "CSS callback - force defang");
833823
834824 $Defang = HTML::Defang->new(
836826 my $StyleRules = $_[3];
837827 foreach my $StyleRule (@$StyleRules) {
838828 foreach my $KeyValueRules (@$StyleRule) {
829 unshift @$KeyValueRules, ["apricot", "nectar", 0];
839830 push @$KeyValueRules, ["orange", "juice", 0];
840831 }
841832 }
900891 </style>
901892 EOF
902893 $Res = $Defang->defang($H);
903 like($Res, qr{^1:<a style="a:b;orange:juice">
904 2:<a style=" c : d ;orange:juice">
905 3:<a style="e:f;orange:juice">
906 4:<a style=" g : h ; orange:juice">
907
908 5:<a style="i:j;k:l;orange:juice">
909 6:<a style=" i2 : j2 ; k2 : l2 ;orange:juice">
910 7:<a style="i3:j3;k3:l3;orange:juice">
911 8:<a style=" i4 : j4 ; k4 : l4 ; orange:juice">
912
913 9:<a style="{q:r;orange:juice}">
914 10:<a style=" { s : t ;orange:juice} ">
915 11:<a style="{u:v;orange:juice}">
916 12:<a style=" { w : x ; orange:juice} ">
917
918 13:<a style="{i5:j5;k5:l5;orange:juice}">
919 14:<a style=" { i6 : j6 ; k6 : l6 ;orange:juice} ">
920 15:<a style="{i7:j7;k7:l7;orange:juice}">
921 16:<a style=" { i8 : j8 ; k8 : l8 ; orange:juice} ">
922
923 17:<a style="s1{y:z;orange:juice}">
924 18:<a style=" s1 { y2 : z2 ;orange:juice} ">
925 19:<a style="s1{y3:z3;orange:juice}">
926 20:<a style=" s1 { y4 : z4 ; orange:juice} ">
927
928 21:<a style="s1{y5:z5;y6:z6;orange:juice}">
929 22:<a style=" s2 { y7 : z7 ; y8 : z8 ;orange:juice} ">
930 23:<a style="s3{y9:z9;y10:z11;orange:juice}">
931 24:<a style=" s4 { y12 : z12 ; y13 : z13 ; orange:juice} ">
932
933 25:<a style="s5{aa:ab;orange:juice}s6{ac:ad;orange:juice}">
934 26:<a style=" s7 { ae : af ;orange:juice} s8 { ag : ah ;orange:juice} ">
935 27:<a style="s5{ai:aj;orange:juice}s6{ak:al;orange:juice}">
936 28:<a style=" s7 { am : an ;orange:juice} s8 { ao : ap ; orange:juice} ">
937
938 29:<a style="{color: #900;orange:juice} :link {background: #ff0;orange:juice} :visited {background: #fff;orange:juice} :hover {outline: thin red solid;orange:juice} :active {background: #00f;orange:juice}">
939 30:<a style="{color: #090; line-height: 1.2;orange:juice} ::first-letter {color: #900;orange:juice}">
940 31:<a href="abccomscript" title="a" id="a1" style="{color: #900;orange:juice}
941 :link {background: #ff0;orange:juice}
942 :visited {background: #fff;orange:juice}
943 :hover {outline: thin red solid;orange:juice}
944 :active {background: #00f;orange:juice}">
894 like($Res, qr{^1:<a style="apricot:nectar;a:b;orange:juice">
895 2:<a style=" apricot:nectar;c : d ;orange:juice">
896 3:<a style="apricot:nectar;e:f;orange:juice">
897 4:<a style=" apricot:nectar;g : h ; orange:juice">
898
899 5:<a style="apricot:nectar;i:j;k:l;orange:juice">
900 6:<a style=" apricot:nectar;i2 : j2 ; k2 : l2 ;orange:juice">
901 7:<a style="apricot:nectar;i3:j3;k3:l3;orange:juice">
902 8:<a style=" apricot:nectar;i4 : j4 ; k4 : l4 ; orange:juice">
903
904 9:<a style="\{apricot:nectar;q:r;orange:juice\}">
905 10:<a style=" \{apricot:nectar; s : t ;orange:juice\} ">
906 11:<a style="\{apricot:nectar;u:v;orange:juice\}">
907 12:<a style=" \{apricot:nectar; w : x ; orange:juice\} ">
908
909 13:<a style="\{apricot:nectar;i5:j5;k5:l5;orange:juice\}">
910 14:<a style=" \{apricot:nectar; i6 : j6 ; k6 : l6 ;orange:juice\} ">
911 15:<a style="\{apricot:nectar;i7:j7;k7:l7;orange:juice\}">
912 16:<a style=" \{apricot:nectar; i8 : j8 ; k8 : l8 ; orange:juice\} ">
913
914 17:<a style="s1\{apricot:nectar;y:z;orange:juice\}">
915 18:<a style=" s1 \{apricot:nectar; y2 : z2 ;orange:juice\} ">
916 19:<a style="s1\{apricot:nectar;y3:z3;orange:juice\}">
917 20:<a style=" s1 \{apricot:nectar; y4 : z4 ; orange:juice\} ">
918
919 21:<a style="s1\{apricot:nectar;y5:z5;y6:z6;orange:juice\}">
920 22:<a style=" s2 \{apricot:nectar; y7 : z7 ; y8 : z8 ;orange:juice\} ">
921 23:<a style="s3\{apricot:nectar;y9:z9;y10:z11;orange:juice\}">
922 24:<a style=" s4 \{apricot:nectar; y12 : z12 ; y13 : z13 ; orange:juice\} ">
923
924 25:<a style="s5\{apricot:nectar;aa:ab;orange:juice\}s6\{apricot:nectar;ac:ad;orange:juice\}">
925 26:<a style=" s7 \{apricot:nectar; ae : af ;orange:juice\} s8 \{apricot:nectar; ag : ah ;orange:juice\} ">
926 27:<a style="s5\{apricot:nectar;ai:aj;orange:juice\}s6\{apricot:nectar;ak:al;orange:juice\}">
927 28:<a style=" s7 \{apricot:nectar; am : an ;orange:juice\} s8 \{apricot:nectar; ao : ap ; orange:juice\} ">
928
929 29:<a style="\{apricot:nectar;color: #900;orange:juice\} :link \{apricot:nectar;background: #ff0;orange:juice\} :visited \{apricot:nectar;background: #fff;orange:juice\} :hover \{apricot:nectar;outline: thin red solid;orange:juice\} :active \{apricot:nectar;background: #00f;orange:juice\}">
930 30:<a style="\{apricot:nectar;color: #090; line-height: 1.2;orange:juice\} ::first-letter \{apricot:nectar;color: #900;orange:juice\}">
931 31:<a href="abccomscript" title="a" id="a1" style="\{apricot:nectar;color: #900;orange:juice\}&#x0a; :link \{apricot:nectar;background: #ff0;orange:juice\}&#x0a; :visited \{apricot:nectar;background: #fff;orange:juice\}&#x0a; :hover \{apricot:nectar;outline: thin red solid;orange:juice\}&#x0a; :active \{apricot:nectar;background: #00f;orange:juice\}">
945932 <style><!--${CommentStartText}
946933
947 selector1{ab:cd;orange:juice}
948 selector2{ab:cccd;orange:juice}
949 selector3{ab:cd;ef:gh;orange:juice}
950 selector4{ab:cd;ef:gh;orange:juice}
951 selector5{ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/orange:juice}
952 selector6 { ab : cd ;orange:juice}
953 selector7 { ab : cd ; orange:juice}
954 selector8 { ab : cd ; ef : gh ;orange:juice}
955 selector9 { ab : cd ; ef : gh ; orange:juice}
956 selector10 { ab : cd ; x : y ; /\*r : url\(http://a.com\) ;\*/orange:juice}
934 selector1\{apricot:nectar;ab:cd;orange:juice\}
935 selector2\{apricot:nectar;ab:cccd;orange:juice\}
936 selector3\{apricot:nectar;ab:cd;ef:gh;orange:juice\}
937 selector4\{apricot:nectar;ab:cd;ef:gh;orange:juice\}
938 selector5\{apricot:nectar;ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*//\*e:url\("http://b.com"\) ;\*/orange:juice\}
939 selector6 \{apricot:nectar; ab : cd ;orange:juice\}
940 selector7 \{apricot:nectar; ab : cd ; orange:juice\}
941 selector8 \{apricot:nectar; ab : cd ; ef : gh ;orange:juice\}
942 selector9 \{apricot:nectar; ab : cd ; ef : gh ; orange:juice\}
943 selector10 \{apricot:nectar; ab : cd ; x : y ; /\*r : url\(http://a.com\) ;\*/orange:juice\}
957944 ${CommentEndText}--></style>$}, "CSS callback - insert attribute");
958945
959946
10361023 7:<a style="i3:j3;">
10371024 8:<a style=" i4 : j4 ; ">
10381025
1039 9:<a style="{}">
1040 10:<a style=" {} ">
1041 11:<a style="{}">
1042 12:<a style=" {} ">
1043
1044 13:<a style="{i5:j5;}">
1045 14:<a style=" { i6 : j6 ; } ">
1046 15:<a style="{i7:j7;}">
1047 16:<a style=" { i8 : j8 ; } ">
1048
1049 17:<a style="s1{}">
1050 18:<a style=" s1 {} ">
1051 19:<a style="s1{}">
1052 20:<a style=" s1 {} ">
1053
1054 21:<a style="s1{y5:z5;}">
1055 22:<a style=" s2 { y7 : z7 ; } ">
1056 23:<a style="s3{y9:z9;}">
1057 24:<a style=" s4 { y12 : z12 ; } ">
1058
1059 25:<a style="s5{}s6{}">
1060 26:<a style=" s7 {} s8 {} ">
1061 27:<a style="s5{}s6{}">
1062 28:<a style=" s7 {} s8 {} ">
1063
1064 29:<a style="{} :link {} :visited {} :hover {} :active {}">
1065 30:<a style="{color: #090; } ::first-letter {}">
1066 31:<a href="abccomscript" title="a" id="a1" style="{}
1067 :link {}
1068 :visited {}
1069 :hover {}
1070 :active {}">
1026 9:<a style="\{\}">
1027 10:<a style=" \{\} ">
1028 11:<a style="\{\}">
1029 12:<a style=" \{\} ">
1030
1031 13:<a style="\{i5:j5;\}">
1032 14:<a style=" \{ i6 : j6 ; \} ">
1033 15:<a style="\{i7:j7;\}">
1034 16:<a style=" \{ i8 : j8 ; \} ">
1035
1036 17:<a style="s1\{\}">
1037 18:<a style=" s1 \{\} ">
1038 19:<a style="s1\{\}">
1039 20:<a style=" s1 \{\} ">
1040
1041 21:<a style="s1\{y5:z5;\}">
1042 22:<a style=" s2 \{ y7 : z7 ; \} ">
1043 23:<a style="s3\{y9:z9;\}">
1044 24:<a style=" s4 \{ y12 : z12 ; \} ">
1045
1046 25:<a style="s5\{\}s6\{\}">
1047 26:<a style=" s7 \{\} s8 \{\} ">
1048 27:<a style="s5\{\}s6\{\}">
1049 28:<a style=" s7 \{\} s8 \{\} ">
1050
1051 29:<a style="\{\} :link \{\} :visited \{\} :hover \{\} :active \{\}">
1052 30:<a style="\{color: #090; \} ::first-letter \{\}">
1053 31:<a href="abccomscript" title="a" id="a1" style="\{\}&#x0a; :link \{\}&#x0a; :visited \{\}&#x0a; :hover \{\}&#x0a; :active \{\}">
10711054 <style><!--${CommentStartText}
10721055
1073 selector1{}
1074 selector2{}
1075 selector3{ab:cd;}
1076 selector4{ab:cd;}
1077 selector5{ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*/}
1078 selector6 {}
1079 selector7 {}
1080 selector8 { ab : cd ; }
1081 selector9 { ab : cd ; }
1082 selector10 { ab : cd ; x : y ; }
1056 selector1\{\}
1057 selector2\{\}
1058 selector3\{ab:cd;\}
1059 selector4\{ab:cd;\}
1060 selector5\{ab:cd;x:y;p:q;/\*r:url\(http://a.com\);\*/\}
1061 selector6 \{\}
1062 selector7 \{\}
1063 selector8 \{ ab : cd ; \}
1064 selector9 \{ ab : cd ; \}
1065 selector10 \{ ab : cd ; x : y ; \}
10831066 ${CommentEndText}--></style>$}, "CSS callback - remove attribute from style rule end");
10841067
10851068 $Defang = HTML::Defang->new(
11011084 $Res = $Defang->defang($H);
11021085 like($Res, qr{1:<a STYLE="a:b;orange:juice">}, "Style callback attribute in upper case");
11031086 like($Res, qr{2:<a STYLE="A:b;orange:juice">}, "Style callback attribute and style property in upper case");
1104 like($Res, qr{3:<STYLE><!--${CommentStartText}A {WIDTH: 30;orange:juice}${CommentEndText}--></STYLE>}, "Style callback tag and style property in upper case");
1087 like($Res, qr{3:<STYLE><!--${CommentStartText}A \{WIDTH: 30;orange:juice\}${CommentEndText}--></STYLE>}, "Style callback tag and style property in upper case");
11051088
11061089 #################################
11071090 # Multiple callback test
00 #!/usr/bin/perl -w
11
2 BEGIN { do '/home/mod_perl/hm/ME/FindLibs.pm'; }
2 BEGIN { # CPAN users don't have ME::*, so use eval
3 eval 'use ME::FindLibs'
4 }
35
46 use utf8;
57 use Test::More tests => 19;
911 use strict;
1012
1113 my ($Res, $H);
12 my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', '', '');
14 my ($DefangString, $CommentStartText, $CommentEndText) = ('defang_', ' ', ' ');
1315
1416 #################################
1517 # Check unicodeness is preserved despite internal non-unicode magic
3941 $Res = $Defang->defang($H);
4042 ok(Encode::is_utf8($Res), "output is unicode");
4143 like($Res, qr{^<!--defang_p-->岡<!--/defang_p-->}, "defang preserves unicode");
42 like($Res, qr{^<!--defang_a defang_href="http://blah\.com/ø" defang_class="û"-->non-english href<!--/defang_a-->}m, "defang preserves unicode2");
44 like($Res, qr{^<!--defang_a href="http://blah\.com/ø" defang_class="û"-->non-english href<!--/defang_a-->}m, "defang preserves unicode2");
4345 $H = <<EOF;
4446 <p>岡</p>
4547 <a href="http://blah.com/ø" class="&#251;">non-english href</a>
4951 $Res = $Defang->defang($H);
5052 ok(Encode::is_utf8($Res), "output2 is unicode");
5153 like($Res, qr{^<!--defang_p-->岡<!--/defang_p-->}, "defang2 preserves unicode");
52 like($Res, qr{^<!--defang_a defang_href="http://blah\.com/ø" defang_class="û"-->non-english href<!--/defang_a-->}m, "defang2 preserves unicode2");
53 like($Res, qr(^<style><!--a { /\*color:redû;\*/ }--></style>)m, "style unicode correct");
54 like($Res, qr{^<!--defang_a href="http://blah\.com/ø" defang_class="û"-->non-english href<!--/defang_a-->}m, "defang2 preserves unicode2");
55 like($Res, qr(^<style><!--${CommentStartText}a \{ /\*color:redû;\*/ \}${CommentEndText}--></style>)m, "style unicode correct");
5456