0 | |
#!/usr/bin/perl -w
|
1 | |
|
2 | 0 |
package HTML::Defang;
|
3 | 1 |
|
4 | 2 |
=head1 NAME
|
|
17 | 15 |
url_callback => \&DefangUrlCallback,
|
18 | 16 |
css_callback => \&DefangCssCallback,
|
19 | 17 |
attribs_to_callback => [ qw(border src) ],
|
20 | |
attribs_callback => \&DefangAttribsCallback
|
|
18 |
attribs_callback => \&DefangAttribsCallback,
|
|
19 |
content_callback => \&ContentCallback,
|
21 | 20 |
);
|
22 | 21 |
|
23 | 22 |
my $SanitizedHtml = $Defang->defang($InputHtml);
|
|
81 | 80 |
return DEFANG_NONE;
|
82 | 81 |
}
|
83 | 82 |
|
|
83 |
# Callback for all content between tags (except <style>, <script>, etc)
|
|
84 |
sub DefangContentCallback {
|
|
85 |
my ($Self, $Defang, $ContentR) = @_;
|
|
86 |
|
|
87 |
$$ContentR =~ s/remove this content//;
|
|
88 |
}
|
|
89 |
|
84 | 90 |
=head1 DESCRIPTION
|
85 | 91 |
|
86 | 92 |
This module accepts an input HTML and/or CSS string and removes any executable code including scripting, embedded objects, applets, etc., and neutralises any XSS attacks. A whitelist based approach is used which means only HTML known to be safe is allowed through.
|
|
120 | 126 |
%EXPORT_TAGS = (all => [qw(@FormTags DEFANG_NONE DEFANG_ALWAYS DEFANG_DEFAULT)]);
|
121 | 127 |
Exporter::export_ok_tags('all');
|
122 | 128 |
|
|
129 |
use 5.008;
|
123 | 130 |
use strict;
|
124 | 131 |
use warnings;
|
125 | 132 |
|
126 | |
our $VERSION=1.04;
|
|
133 |
our $VERSION=1.07;
|
127 | 134 |
|
128 | 135 |
use constant DEFANG_NONE => 0;
|
129 | 136 |
use constant DEFANG_ALWAYS => 1;
|
|
137 | 144 |
our @FormTags = qw(form input textarea select option button fieldset label legend multicol nextid optgroup);
|
138 | 145 |
|
139 | 146 |
# Some regexps for matching HTML tags + key=value attributes
|
140 | |
my $AttrKeyStartLineRE = qr/(?:[^=<>\s\/\\]{1,}|[\/]\s*(?==))/;
|
|
147 |
my $AttrKeyStartLineRE = qr/(?:[^=<>\s\/\\]{1,}|[\/](?!\s*>))/;
|
141 | 148 |
my $AttrKeyRE = qr/(?<=[\s'"\/])$AttrKeyStartLineRE/;
|
142 | |
my $AttrValRE = qr/[^>\s'"`][^>\s]*|'[^']{0,16384}?'|"[^"]{0,16384}?"|`[^`]{0,16384}?`/;
|
|
149 |
my $AttrValRE = qr/[^>\s'"`][^>\s]*|'[^']*?'|"[^"]*?"|`[^`]*?`/;
|
143 | 150 |
my $AttributesRE = qr/(?:(?:$AttrKeyRE\s*)?(?:=\s*$AttrValRE\s*)?)*/;
|
144 | 151 |
my $TagNameRE = qr/[A-Za-z][A-Za-z0-9\#\&\;\:\!_-]*/;
|
145 | 152 |
|
146 | |
my $StyleSelectors = qr/[^{}\s][^{}]*?/;
|
|
153 |
my $StyleSelectors = qr/[^{}\s][^{}]{0,1024}?/;
|
147 | 154 |
my $StyleName = qr/[^:}\s][^:{}]*?/;
|
148 | 155 |
my $StyleValue = qr/[^;}\s][^;}]*|.*$/;
|
149 | 156 |
my $StyleRule = qr/$StyleName\s*:\s*$StyleValue\s*/;
|
150 | |
my $StyleRules = qr/\s*(?:$StyleRule)?(?:;\s*$StyleRule)*(?:;\s*)?/;
|
151 | |
|
152 | |
my $Fonts = qr/"?([A-Za-z0-9\s-]+)"?/;
|
|
157 |
my $StyleRules = qr/\s*(?:$StyleRule)?(?:;\s*$StyleRule)*(?:;\s*)*/;
|
|
158 |
my $StyleMediaSelector = qr/\@media\b[^{]*/;
|
|
159 |
my $RECStyleMediaSelector = qr{\G(\s*)($StyleMediaSelector)(\{)(\s*)}so;
|
|
160 |
my $RECStyleNaked = qr/\G(\s*)()()()($StyleRules)()(\s*)/o;
|
|
161 |
my $RECStyleSelected = qr/\G(\s*)((?:$StyleSelectors)?)(\s*)(\{)($StyleRules)(\})(\s*)/o;
|
|
162 |
|
|
163 |
my $Fonts = qr/["']?([A-Za-z0-9\s-]+)["']?/;
|
153 | 164 |
my $Alignments = qr/(absbottom|absmiddle|all|autocentre|baseline|bottom|center|justify|left|middle|none|right|texttop|top)/;
|
154 | 165 |
|
155 | 166 |
my $Executables = '([^@]\.com|'.
|
156 | 167 |
'.*\.(exe|cmd|bat|pif|scr|sys|sct|lnk|dll'.
|
157 | 168 |
'|vbs?|vbe|hta|shb|shs|hlp|chm|eml|wsf|wsh|js'.
|
158 | 169 |
'|asx|wm.|mdb|mht|msi|msp|cpl|lib|reg))';
|
159 | |
my $SrcBanStd = qr/^([A-Za-z]*script|.*\&{|mocha|about|opera|mailto:|hcp:|\/(dev|proc)|\\|file|smb|cid:${Executables}(@|\?|$))/i;
|
|
170 |
my $SrcBanStd = qr/^([A-Za-z]*script|.*\&\{|mocha|about|opera|mailto:|hcp:|\/(dev|proc)|\\|file|smb|cid:${Executables}(@|\?|$))/i;
|
160 | 171 |
|
161 | 172 |
my %Rules =
|
162 | 173 |
(
|
|
171 | 182 |
"coords" => qr/^(\d+,)+\d+$/i,
|
172 | 183 |
"datetime" => qr/^\d\d\d\d-\d\d-\d\d.{0,5}\d\d:\d\d:\d\d.{0,5}$/,
|
173 | 184 |
"dir" => qr/^(ltr|rtl)$/i,
|
|
185 |
"empty" => qr/^$/i,
|
174 | 186 |
"eudora" => qr/^(autourl)$/i,
|
175 | 187 |
"font-face" => qr/^((${Fonts})[,\s]*)+$/i,
|
176 | 188 |
"form-enctype" => qr/^(application\/x-www-form-urlencoded|multipart\/form-data)$/i,
|
177 | 189 |
"form-method" => qr/^(get|post)$/i,
|
178 | 190 |
"frame" => qr/^(void|above|below|hsides|vsides|lhs|rhs|box|border)$/i,
|
179 | 191 |
# href: Not javascript, vbs or vbscript
|
180 | |
"href" => [ qr/(?i:^([a-z]*script\s*:|.*\&{|mocha|hcp|opera\s*:|about\s*:|smb|\/dev\/|<))|[^\x00-\x7f]/ ],
|
|
192 |
"href" => [ qr/^((?:[a-z]*script|mocha|opera|about|data|tcl)\s*:|.*\&\{|hcp|smb|\/dev\/|<)/i ],
|
181 | 193 |
"usemap-href" => qr/^#[A-Za-z0-9_.-]+$/, # this is not really a href at all!
|
182 | 194 |
"input-size" => qr/^(\d{1,4})$/, # some browsers freak out with very large widgets
|
183 | 195 |
"input-type" => qr/^(button|checkbox|file|hidden|image|password|radio|readonly|reset|submit|text)$/i,
|
|
203 | 215 |
# "style" => qr/expression|eval|script:|mocha:|\&{|\@import|(?<!background-)position:|background-image/i, # XXX there are probably a million more ways to cause trouble with css!
|
204 | 216 |
"style" => qr/^.*$/s,
|
205 | 217 |
#kc In addition to this, we could strip all 'javascript:|expression|' etc. from all attributes(in attribute_cleanup())
|
206 | |
"stylesheet" => [ qr/expression|eval|script:|mocha:|\&{|\@import/i ], # stylesheets are forbidden if Embedded => 1. css positioning can be allowed in an iframe.
|
|
218 |
"stylesheet" => [ qr/expression|eval|script:|mocha:|\&\{|\@import/i ], # stylesheets are forbidden if Embedded => 1. css positioning can be allowed in an iframe.
|
207 | 219 |
# NB see also `process_stylesheet' below
|
208 | 220 |
"style-type" => [ qr/script|mocha/i ],
|
209 | |
"size" => qr/^[\d.]+(px|%)?$/i,
|
|
221 |
"size" => qr/^[\+\-]?[\d.]+(px|%)?$/i,
|
210 | 222 |
"target" => qr/^[A-Za-z0-9_][A-Za-z0-9_.-]*$/,
|
211 | 223 |
"base-href" => qr/^https?:\/\/[\w.\/]+$/,
|
212 | 224 |
"anything" => qr/^.*$/, #[ 0, 0 ],
|
|
243 | 255 |
"scroll" => "boolean",
|
244 | 256 |
"scrolling" => "boolean",
|
245 | 257 |
"topmargin" => "size",
|
|
258 |
"type" => "mime-type",
|
246 | 259 |
"valign" => "align",
|
247 | 260 |
"width" => "size",
|
|
261 |
"/" => "empty",
|
248 | 262 |
);
|
249 | 263 |
|
250 | 264 |
my %ListAttributes =
|
|
292 | 306 |
);
|
293 | 307 |
|
294 | 308 |
my %Tags = (
|
295 | |
script => \&defang_script,
|
296 | |
style => \&defang_style,
|
|
309 |
script => \&defang_script_tag,
|
|
310 |
style => \&defang_style_tag,
|
297 | 311 |
"html" => 100,
|
298 | 312 |
#
|
299 | 313 |
# Safe elements commonly found in the <head> block follow.
|
|
360 | 374 |
"shape" => "shape",
|
361 | 375 |
"target" => "target",
|
362 | 376 |
},
|
|
377 |
"article" => 1,
|
363 | 378 |
"applet" => 0,
|
364 | 379 |
"basefont" =>
|
365 | 380 |
{
|
|
416 | 431 |
"size" => "number",
|
417 | 432 |
"ptsize" => "number",
|
418 | 433 |
},
|
|
434 |
"footer" => 1,
|
419 | 435 |
"form" => # FORM
|
420 | 436 |
{
|
421 | 437 |
"method" => "form-method",
|
|
424 | 440 |
"accept" => "anything",
|
425 | 441 |
"accept-charset" => "anything",
|
426 | 442 |
},
|
|
443 |
"header" => 1,
|
427 | 444 |
"hr" =>
|
428 | 445 |
{
|
429 | 446 |
"size" => "number",
|
|
503 | 520 |
"nobr" => 0,
|
504 | 521 |
"noembed" => 1,
|
505 | 522 |
"nolayer" => 1,
|
506 | |
"noscript" => 1,
|
|
523 |
# Pretend our defang result is going into a non-scripting environment,
|
|
524 |
# even though javascript is likely enabled, so just defang all noscript tags
|
|
525 |
"noscript" => 0,
|
507 | 526 |
"noembed" => 1,
|
508 | 527 |
"object" => 0,
|
509 | 528 |
"ol" => \%ListAttributes,
|
|
526 | 545 |
"pre" => 1,
|
527 | 546 |
"rt" => 0,
|
528 | 547 |
"ruby" => 0,
|
|
548 |
"section" => 1,
|
529 | 549 |
"select" => # FORM
|
530 | 550 |
{
|
531 | 551 |
"disabled" => "anything",
|
|
631 | 651 |
);
|
632 | 652 |
|
633 | 653 |
# Some entity conversions for attributes
|
|
654 |
my $CtrlChars = qr/[\x00-\x08\x0b-\x1f]/;
|
634 | 655 |
my %EntityToChar = (quot => '"', apos => "'", amp => '&', 'lt' => '<', 'gt' => '>');
|
635 | |
my %CharToEntity = reverse %EntityToChar;
|
636 | |
my %QuoteRe = ('"' => qr/(["&<>])/, "'" => qr/(['&<>])/, "" => qr/(["&<>])/);
|
637 | |
|
638 | |
# Default list of mismatched tags to track
|
639 | |
my %MismatchedTags = map { $_ => 1 } qw(table tbody thead tr td th font div span pre center blockquote dl ul ol h1 h2 h3 h4 h5 h6 fieldset tt p noscript a);
|
|
656 |
my %CharToEntity = ((reverse %EntityToChar), ' ' => '#x20', '/' => '#x2f', "\x09" => '#x09', "\x0a" => '#x0a');
|
|
657 |
my %QuoteRe = ('"' => qr/(["&<>\x09\x0a])/, "'" => qr/(['&<>\x09\x0a])/, "" => qr/(['"&<> \/\x09\x0a])/);
|
640 | 658 |
|
641 | 659 |
# When fixing mismatched tags, sometimes a close tag
|
642 | 660 |
# shouldn't close all the way out
|
|
673 | 691 |
# Convert to hash of hashes
|
674 | 692 |
$_ = { default => $_->[0], map { $_ => 1 } @$_ } for values %ImplicitOpenTags;
|
675 | 693 |
|
676 | |
my %BlockTags = map { $_ => 1 } qw(h1 h2 h3 h4 h5 h6 p div pre plaintext address blockquote center form table tbody thead tfoot tr td caption colgroup col);
|
677 | |
my %InlineTags = map { $_ => 1 } qw(span abbr acronym q sub sup cite code em kbd samp strong var dfn strike b i u s tt small big nobr a);
|
|
694 |
my %TableTags = map { $_ => 1 } qw(table tbody thead tfoot tr td th caption colgroup col);
|
|
695 |
my %BlockTags = map { $_ => 1 } qw(h1 h2 h3 h4 h5 h6 p div pre plaintext address blockquote center form table tbody thead tfoot tr td th caption colgroup col dl ul ol li fieldset);
|
|
696 |
my %InlineTags = map { $_ => 1 } qw(span abbr acronym q sub sup cite code em kbd samp strong var dfn strike b i u s tt small big nobr a font);
|
678 | 697 |
my %NestInlineTags = map { $_ => 1 } qw(span abbr acronym q sub sup cite code em kbd samp strong var dfn strike b i u s tt small big nobr);
|
679 | 698 |
|
|
699 |
# Default list of mismatched tags to track
|
|
700 |
my %MismatchedTags = (%BlockTags, %InlineTags);
|
|
701 |
|
680 | 702 |
=head1 CONSTRUCTOR
|
681 | 703 |
|
682 | 704 |
=over 4
|
|
716 | 738 |
=item B<css_callback>
|
717 | 739 |
|
718 | 740 |
Subroutine reference to be invoked when CSS data is found either as the contents of a 'style' attribute in an HTML tag, or as the contents of a <style> HTML tag.
|
|
741 |
|
|
742 |
=item B<content_callback>
|
|
743 |
|
|
744 |
Subroutine reference to be invoked when standard content between HTML tags in found.
|
719 | 745 |
|
720 | 746 |
=item B<fix_mismatched_tags>
|
721 | 747 |
|
|
743 | 769 |
return value of a callback. Any tag or attribute modifications made
|
744 | 770 |
directly by a callback are still performed.
|
745 | 771 |
|
|
772 |
=item B<delete_defang_content>
|
|
773 |
|
|
774 |
Normally defanged tags are turned into comments and prefixed by defang_,
|
|
775 |
and defanged styles are surrounded by /* ... */. If this is set to
|
|
776 |
true, then defanged content is deleted instead
|
|
777 |
|
746 | 778 |
=item B<Debug>
|
747 | 779 |
|
748 | 780 |
If set, prints debugging output.
|
749 | |
|
750 | |
=back
|
751 | 781 |
|
752 | 782 |
=back
|
753 | 783 |
|
|
761 | 791 |
|
762 | 792 |
my %Opts = @_;
|
763 | 793 |
|
764 | |
# my $Context = shift;
|
765 | |
|
766 | |
my ($tags_to_callback, $attribs_to_callback) = ($Opts{"tags_to_callback"}, $Opts{"attribs_to_callback"});
|
767 | |
my %tags_to_callback = map { $_ => 1 } @$tags_to_callback if $tags_to_callback;
|
768 | |
my %attribs_to_callback = map { $_ => 1 } @$attribs_to_callback if $attribs_to_callback;
|
|
794 |
my ($tags_to_callback, $attribs_to_callback, $empty_tags_to_collapse, $mismatched_tags_to_fix)
|
|
795 |
= @Opts{qw(tags_to_callback attribs_to_callback empty_tags_to_collapse mismatched_tags_to_fix)};
|
|
796 |
my %tags_to_callback;
|
|
797 |
%tags_to_callback = map { $_ => 1 } @$tags_to_callback if $tags_to_callback;
|
|
798 |
my %attribs_to_callback;
|
|
799 |
%attribs_to_callback = map { $_ => 1 } @$attribs_to_callback if $attribs_to_callback;
|
|
800 |
my %empty_tags_to_collapse;
|
|
801 |
%empty_tags_to_collapse = map { $_ => 1 } @$empty_tags_to_collapse if $empty_tags_to_collapse;
|
769 | 802 |
my %mismatched_tags_to_fix = %MismatchedTags;
|
770 | |
%mismatched_tags_to_fix = map { $_ => 1 } @{$Opts{'mismatched_tags_to_fix'}} if $Opts{'mismatched_tags_to_fix'};
|
|
803 |
%mismatched_tags_to_fix = map { $_ => 1 } @$mismatched_tags_to_fix if $mismatched_tags_to_fix;
|
771 | 804 |
|
772 | 805 |
my $Self = {
|
773 | 806 |
defang_string => 'defang_',
|
|
807 |
defang_re => qr/^defang_/,
|
|
808 |
defang_default => (defined $Opts{defang_default} ? $Opts{defang_default} : DEFANG_DEFAULT),
|
774 | 809 |
allow_double_defang => $Opts{allow_double_defang},
|
775 | 810 |
tags_to_callback => \%tags_to_callback,
|
776 | 811 |
tags_callback => $Opts{tags_callback},
|
|
778 | 813 |
attribs_callback => $Opts{attribs_callback},
|
779 | 814 |
url_callback => $Opts{url_callback},
|
780 | 815 |
css_callback => $Opts{css_callback},
|
|
816 |
content_callback => $Opts{content_callback},
|
781 | 817 |
mismatched_tags_to_fix => \%mismatched_tags_to_fix,
|
782 | 818 |
fix_mismatched_tags => $Opts{fix_mismatched_tags},
|
783 | 819 |
context => $Opts{context},
|
784 | 820 |
opened_tags => [],
|
785 | 821 |
opened_tags_count => {},
|
786 | |
opened_nested_tags => [],
|
|
822 |
closed_into_block_tags => [],
|
|
823 |
empty_tags_to_collapse => \%empty_tags_to_collapse,
|
|
824 |
quiet => $Opts{quiet},
|
|
825 |
delete_defang_content => $Opts{delete_defang_content},
|
787 | 826 |
Debug => $Opts{Debug},
|
788 | 827 |
};
|
789 | 828 |
|
790 | 829 |
bless ($Self, $Class);
|
791 | 830 |
return $Self;
|
792 | 831 |
}
|
|
832 |
|
|
833 |
=item I<HTML::Defang-E<gt>new_bodyonly(%Options)>
|
|
834 |
|
|
835 |
Constructs a new HTML::Defang object that has the following
|
|
836 |
implicit options
|
|
837 |
|
|
838 |
=over 4
|
|
839 |
|
|
840 |
=item B<fix_mismatched_tags = 1>
|
|
841 |
|
|
842 |
=item B<delete_defang_content = 1>
|
|
843 |
|
|
844 |
=item B<tags_to_callback = [ qw(html head link body meta title bgsound) ]>
|
|
845 |
|
|
846 |
=item B<tags_callback = { ... remove all above tags and related content ... }>
|
|
847 |
|
|
848 |
=item B<url_callback = { ... explicity DEFANG_NONE to leave everything alone ... }>
|
|
849 |
|
|
850 |
=back
|
|
851 |
|
|
852 |
Basically this is a easy way to remove all html boiler plate
|
|
853 |
content and return only the html body content.
|
|
854 |
|
|
855 |
=cut
|
|
856 |
|
|
857 |
sub new_bodyonly {
|
|
858 |
return shift->new(
|
|
859 |
fix_mismatched_tags => 1,
|
|
860 |
delete_defang_content => 1,
|
|
861 |
tags_to_callback => [ qw(html head link body meta title bgsound) ],
|
|
862 |
tags_callback => sub {
|
|
863 |
my (undef, $Defang, $Angle, $lcTag, $IsEndTag, $AttributeHash, $AttributesEnd, $HtmlR, $OutR) = @_;
|
|
864 |
$$HtmlR =~ m{\G.*?(?=</title|</head)}gcis if $lcTag eq 'title' && !$IsEndTag;
|
|
865 |
return DEFANG_ALWAYS;
|
|
866 |
},
|
|
867 |
url_callback => sub { return DEFANG_NONE; },
|
|
868 |
@_
|
|
869 |
);
|
|
870 |
}
|
|
871 |
|
|
872 |
=back
|
|
873 |
|
|
874 |
=cut
|
793 | 875 |
|
794 | 876 |
=head1 CALLBACK METHODS
|
795 | 877 |
|
|
1047 | 1129 |
|
1048 | 1130 |
=over 4
|
1049 | 1131 |
|
1050 | |
=item I<defang($InputHtml)>
|
|
1132 |
=cut
|
|
1133 |
|
|
1134 |
=item I<defang($InputHtml, \%Opts)>
|
1051 | 1135 |
|
1052 | 1136 |
Cleans up $InputHtml of any executable code including scripting, embedded objects, applets, etc., and defang any XSS attacks.
|
1053 | 1137 |
|
1054 | |
=over 4
|
|
1138 |
=over 4
|
1055 | 1139 |
|
1056 | 1140 |
=item B<Method parameters>
|
1057 | 1141 |
|
|
1068 | 1152 |
Returns the cleaned HTML. If fix_mismatched_tags is set, any tags that appear in @$mismatched_tags_to_fix that are unbalanced are automatically commented or closed.
|
1069 | 1153 |
|
1070 | 1154 |
=cut
|
1071 | |
|
1072 | 1155 |
sub defang {
|
1073 | 1156 |
my $Self = shift;
|
1074 | 1157 |
|
1075 | 1158 |
my $I = shift;
|
|
1159 |
my $Opts = shift;
|
1076 | 1160 |
|
1077 | 1161 |
my $Debug = $Self->{Debug};
|
1078 | 1162 |
|
1079 | |
my $HeaderCharset = shift;
|
|
1163 |
my $HeaderCharset = $Opts->{header_charset};
|
1080 | 1164 |
warn("defang HeaderCharset=$HeaderCharset") if $Debug;
|
1081 | |
my $FallbackCharset = shift;
|
|
1165 |
my $FallbackCharset = $Opts->{fallback_charset};
|
1082 | 1166 |
warn("defang FallbackCharset=$FallbackCharset") if $Debug;
|
1083 | 1167 |
|
1084 | 1168 |
$Self->{Reentrant}++;
|
|
1169 |
|
|
1170 |
# Output buffer
|
|
1171 |
local $Self->{OutR} = $Opts->{add_to_existing} ? $Self->{OutR} : \(my $O = "");
|
|
1172 |
my $OutR = $Self->{OutR};
|
1085 | 1173 |
|
1086 | 1174 |
# Get encoded characters
|
1087 | 1175 |
# $Self->{Charset} = $Self->get_applicable_charset($_, $HeaderCharset, $FallbackCharset);
|
|
1110 | 1198 |
# Force byte matching everywhere (see above)
|
1111 | 1199 |
use bytes;
|
1112 | 1200 |
|
|
1201 |
Carp::cluck() if !defined $I;
|
|
1202 |
|
1113 | 1203 |
# Strip all NUL chars
|
1114 | 1204 |
$I =~ s/\0//g;
|
1115 | |
|
1116 | |
# Output buffer
|
1117 | |
my $O = '';
|
1118 | 1205 |
|
1119 | 1206 |
# This parser uses standard /\G.../gc matching, so have to be careful
|
1120 | 1207 |
# to not reset pos() on the string
|
|
1129 | 1216 |
# walk to next < (testing in 5.8.8 shows .*? is faster than [^<]* or [^<]*?)
|
1130 | 1217 |
if ($I =~ m{\G(.*?)<}gcso) {
|
1131 | 1218 |
|
|
1219 |
my $Content = $1;
|
|
1220 |
|
|
1221 |
# Call content callback if present
|
|
1222 |
$Self->{content_callback}->($Self->{context}, $Self, \$Content)
|
|
1223 |
if $Self->{content_callback};
|
|
1224 |
|
1132 | 1225 |
# Everything before tag goes into the output
|
1133 | |
$O .= $1;
|
|
1226 |
$$OutR .= $Content;
|
1134 | 1227 |
|
1135 | 1228 |
# All tags default to open/close with </>
|
1136 | 1229 |
my ($OpenAngle, $CloseAngle) = ('<', '>');
|
|
1177 | 1270 |
NoParseAttributes:
|
1178 | 1271 |
my $Defang = DEFANG_ALWAYS;
|
1179 | 1272 |
|
1180 | |
my $TagOps = $Tags{lc $Tag};
|
|
1273 |
my $lcTag = lc $Tag;
|
|
1274 |
my $TagOps = $Tags{$lcTag};
|
1181 | 1275 |
|
1182 | 1276 |
# Process this tag
|
1183 | |
if (ref $TagOps eq "CODE") {
|
|
1277 |
if (!exists $Self->{tags_to_callback}->{$lcTag} && ref $TagOps eq "CODE") {
|
1184 | 1278 |
|
1185 | 1279 |
warn "process_tag Found CODE reference" if $Debug;
|
1186 | |
$Defang = $Self->${TagOps}(\$O, \$I, $TagOps, \$OpenAngle, $IsEndTag, $Tag, $TagTrail, \@Attributes, \$CloseAngle);
|
|
1280 |
$Defang = $Self->${TagOps}($OutR, \$I, $TagOps, \$OpenAngle, $IsEndTag, $lcTag, $TagTrail, \@Attributes, \$CloseAngle);
|
1187 | 1281 |
|
1188 | 1282 |
} else {
|
1189 | 1283 |
|
1190 | 1284 |
warn "process_tag Found regular tag" if $Debug;
|
1191 | |
$Defang = $Self->defang_attributes(\$O, \$I, $TagOps, \$OpenAngle, $IsEndTag, $Tag, $TagTrail, \@Attributes, \$CloseAngle);
|
|
1285 |
$Defang = $Self->defang_attributes($OutR, \$I, $TagOps, \$OpenAngle, $IsEndTag, $lcTag, $TagTrail, \@Attributes, \$CloseAngle);
|
1192 | 1286 |
|
1193 | 1287 |
}
|
1194 | 1288 |
die "Callback reset pos on Tag=$Tag IsEndTag=$IsEndTag" if !defined pos($I);
|
|
1199 | 1293 |
# @Attributes can have unicode values, but we're within "use bytes", so it's flattened ok
|
1200 | 1294 |
my $TagContent = $TagTrail . join("", grep { defined } map { @$_ } @Attributes);
|
1201 | 1295 |
|
1202 | |
$Defang ||= $Self->track_tags(\$O, \$I, $TagOps, \$OpenAngle, $IsEndTag, $Tag, \$TagContent)
|
1203 | |
if $Self->{fix_mismatched_tags} && ($Defang != DEFANG_ALWAYS);
|
|
1296 |
if ($Self->{fix_mismatched_tags} && ($Defang == DEFANG_NONE)) {
|
|
1297 |
if (!$IsEndTag) {
|
|
1298 |
$Defang = $Self->open_tag(0, $OutR, \$I, $lcTag, \$TagContent);
|
|
1299 |
} else {
|
|
1300 |
$Defang = $Self->close_tag(0, $OutR, \$I, $lcTag);
|
|
1301 |
goto SkipOutput if $Defang == DEFANG_ALWAYS;
|
|
1302 |
}
|
|
1303 |
}
|
1204 | 1304 |
|
1205 | 1305 |
# defang unknown tags
|
1206 | 1306 |
if ($Defang != DEFANG_NONE) {
|
1207 | 1307 |
warn "defang Defanging $Tag" if $Debug;
|
1208 | |
$Tag = $Self->{defang_string} . $Tag
|
1209 | |
if $Self->{allow_double_defang}
|
1210 | |
|| (
|
1211 | |
substr( $Tag, 0, length( $Self->{defang_string} ) ) ne
|
1212 | |
$Self->{defang_string} );
|
1213 | |
$TagContent =~ s/--//g;
|
1214 | |
$Tag =~ s/--//g;
|
1215 | |
$OpenAngle =~ s/^</<!--/;
|
1216 | |
$CloseAngle =~ s/>$/-->/;
|
|
1308 |
if ($Self->{delete_defang_content}) {
|
|
1309 |
$OpenAngle = $IsEndTag = $Tag = $TagContent = $CloseAngle = '';
|
|
1310 |
} else {
|
|
1311 |
$Tag = $Self->{defang_string} . $Tag
|
|
1312 |
if $Self->{allow_double_defang} || $Tag !~ $Self->{defang_re};
|
|
1313 |
$TagContent =~ s/--//g;
|
|
1314 |
$Tag =~ s/--//g;
|
|
1315 |
$OpenAngle =~ s/^</<!--/;
|
|
1316 |
$CloseAngle =~ s/>$/-->/;
|
|
1317 |
}
|
1217 | 1318 |
}
|
1218 | 1319 |
|
1219 | 1320 |
# And put it all back together into the output string
|
1220 | |
$O .= $OpenAngle . $IsEndTag . $Tag . $TagContent . $CloseAngle;
|
1221 | |
|
1222 | |
# It's a comment of some sort. We are looking for regular HTML comment, XML CDATA section and
|
1223 | |
# IE conditional comments
|
1224 | |
# Refer http://msdn.microsoft.com/en-us/library/ms537512.aspx for IE conditional comment information
|
1225 | |
} elsif ($I =~ m{\G(!)((?:\[CDATA\[|(?:--)?\[if|--)?)}gcis) {
|
|
1321 |
$$OutR .= $OpenAngle . $IsEndTag . $Tag . $TagContent . $CloseAngle;
|
|
1322 |
SkipOutput:
|
|
1323 |
|
|
1324 |
# It's a comment of some sort. We are looking for regular HTML comment, XML CDATA section
|
|
1325 |
} elsif ($I =~ m{\G(!)((?:\[CDATA\[|--)?)}gcis) {
|
1226 | 1326 |
|
1227 | 1327 |
my ($Comment, $CommentDelim) = ($1, $2);
|
1228 | 1328 |
warn "defang Comment=$Comment CommentDelim=$CommentDelim" if $Debug;
|
1229 | |
|
|
1329 |
|
1230 | 1330 |
# Find the appropriate closing delimiter
|
1231 | 1331 |
my $IsCDATA = $CommentDelim eq "[CDATA[";
|
1232 | 1332 |
my $ClosingCommentDelim = $IsCDATA ? "]]" : $CommentDelim;
|
1233 | |
|
1234 | |
my $EndRestartCommentsText = '';
|
1235 | |
# Handle IE conditionals specially. We can have <![if ...]>, <!--[if ...]> and <!--[if ...]-->
|
1236 | |
# for the third case, we just want to immediately match the -->
|
1237 | |
if ($CommentDelim =~ /((?:--)?)\[if/) {
|
1238 | |
my $ConditionalDelim = $1;
|
1239 | |
$EndRestartCommentsText = '--' if $ConditionalDelim eq '';
|
1240 | |
$ClosingCommentDelim = $CommentDelim;
|
1241 | |
if ($I !~ m{\G[^\]]*\]-->}gcis) {
|
1242 | |
$ClosingCommentDelim = "<![endif]$ConditionalDelim";
|
1243 | |
}
|
|
1333 |
|
|
1334 |
warn "defang ClosingCommentDelim=$ClosingCommentDelim" if $Debug;
|
|
1335 |
|
|
1336 |
my ($CommentStartText, $CommentEndText) = ("--/*SC*/", "/*EC*/--");
|
|
1337 |
|
|
1338 |
# Convert to regular HTML comment
|
|
1339 |
if (!$Self->{delete_defang_content}) {
|
|
1340 |
$$OutR .= $OpenAngle . $Comment . $CommentStartText;
|
1244 | 1341 |
}
|
1245 | 1342 |
|
1246 | |
warn "defang ClosingCommentDelim=$ClosingCommentDelim" if $Debug;
|
1247 | |
|
1248 | |
my ($CommentStartText, $CommentEndText) = ("--/*SC*/", "/*EC*/--");
|
1249 | |
|
1250 | |
# Convert to regular HTML comment
|
1251 | |
$O .= $OpenAngle . $Comment . $CommentStartText;
|
1252 | |
|
1253 | 1343 |
# Find closing comment
|
1254 | |
if ($I =~ m{\G(.*?)(\Q${ClosingCommentDelim}\E!?\s*)(>)}gcis || $I =~ m{\G(.*?)(--)(>)}gcis) {
|
|
1344 |
if ($I =~ m{\G(.*?)(\Q$ClosingCommentDelim\E!?\s*)(>)}gcis || $I =~ m{\G(.*?)(--)(>)}gcis) {
|
1255 | 1345 |
|
1256 | 1346 |
my ( $StartTag, $CommentData, $ClosingTag, $CloseAngle ) =
|
1257 | 1347 |
( $CommentDelim, $1, $2, $3 );
|
1258 | |
|
1259 | |
if ($EndRestartCommentsText && $CommentData =~ s/^(.*?)(>.*)$/$2/s) {
|
1260 | |
$StartTag .= $1;
|
1261 | |
}
|
1262 | 1348 |
|
1263 | 1349 |
# Strip all HTML comment markers
|
1264 | 1350 |
$StartTag =~ s/--//g;
|
1265 | 1351 |
$CommentData =~ s/--//g;
|
1266 | 1352 |
$ClosingTag =~ s/--//g;
|
1267 | 1353 |
|
1268 | |
$StartTag .= $EndRestartCommentsText if $CommentData;
|
1269 | |
$ClosingTag =~ s{^(<!)}{$1$EndRestartCommentsText} if $CommentData;
|
1270 | |
|
1271 | |
# Put it all into the output
|
1272 | |
$O .= $StartTag
|
1273 | |
. ($EndRestartCommentsText ? $Self->defang($CommentData) : $CommentData)
|
1274 | |
. $ClosingTag
|
1275 | |
. $CommentEndText
|
1276 | |
. $CloseAngle;
|
|
1354 |
if (!$Self->{delete_defang_content}) {
|
|
1355 |
# Put it all into the output
|
|
1356 |
$$OutR .= $StartTag
|
|
1357 |
. $CommentData
|
|
1358 |
. $ClosingTag
|
|
1359 |
. $CommentEndText
|
|
1360 |
. $CloseAngle;
|
|
1361 |
}
|
1277 | 1362 |
|
1278 | 1363 |
# No closing comment, so we add that
|
1279 | 1364 |
} else {
|
|
1284 | 1369 |
$Data =~ s/--//g;
|
1285 | 1370 |
|
1286 | 1371 |
# Output
|
1287 | |
$O .= $Data . $CommentEndText . ">";
|
|
1372 |
if (!$Self->{delete_defang_content}) {
|
|
1373 |
$$OutR .= $Data . $CommentEndText . ">";
|
|
1374 |
}
|
1288 | 1375 |
|
1289 | 1376 |
}
|
1290 | 1377 |
|
|
1303 | 1390 |
|
1304 | 1391 |
$Data =~ s{--}{}g;
|
1305 | 1392 |
|
1306 | |
$O .= $OpenAngle . '!--' . $Processing . $Data . '-->';
|
|
1393 |
if (!$Self->{delete_defang_content}) {
|
|
1394 |
$$OutR .= $OpenAngle . '!--' . $Processing . $Data . '-->';
|
|
1395 |
}
|
1307 | 1396 |
|
1308 | 1397 |
}
|
1309 | 1398 |
# Some other thing starting with <, keep looking
|
1310 | 1399 |
|
|
1400 |
if (exists $Self->{TrackedAppendOutput}) {
|
|
1401 |
for (@{delete $Self->{TrackedAppendOutput}}) {
|
|
1402 |
$Self->open_tag(1, $OutR, \$I, $_->[0], \$_->[1], 1);
|
|
1403 |
}
|
|
1404 |
}
|
1311 | 1405 |
if (exists $Self->{AppendOutput}) {
|
1312 | |
$O .= delete $Self->{AppendOutput};
|
|
1406 |
$$OutR .= delete $Self->{AppendOutput};
|
1313 | 1407 |
}
|
1314 | 1408 |
if (exists $Self->{DelayedAppendOutput}) {
|
1315 | |
$O .= $Self->defang(delete $Self->{DelayedAppendOutput});
|
|
1409 |
$Self->defang(delete $Self->{DelayedAppendOutput}, { add_to_existing => 1 });
|
1316 | 1410 |
}
|
1317 | 1411 |
next;
|
1318 | 1412 |
}
|
|
1321 | 1415 |
warn "defang OutputRemainder" if $Debug;
|
1322 | 1416 |
$I =~ m/\G(.*)$/gcs;
|
1323 | 1417 |
|
1324 | |
$O .= $1 if $1;
|
|
1418 |
$$OutR .= $1 if $1;
|
1325 | 1419 |
|
1326 | 1420 |
# Exit if we got here
|
1327 | 1421 |
last;
|
|
1329 | 1423 |
|
1330 | 1424 |
# If not a recursive call, close mismatched tags
|
1331 | 1425 |
if ($Self->{Reentrant}-- <= 1) {
|
1332 | |
$Self->close_tags(\$O);
|
|
1426 |
$Self->close_all_tags($OutR, \$I);
|
1333 | 1427 |
}
|
1334 | 1428 |
|
1335 | 1429 |
# Turn on utf-8 flag again
|
1336 | |
Encode::_utf8_on($O) if $UTF8Input;
|
1337 | |
|
1338 | |
return $O;
|
|
1430 |
Encode::_utf8_on($$OutR) if $UTF8Input;
|
|
1431 |
|
|
1432 |
return $$OutR;
|
1339 | 1433 |
}
|
1340 | 1434 |
|
1341 | 1435 |
=item I<add_to_output($String)>
|
|
1359 | 1453 |
=back
|
1360 | 1454 |
|
1361 | 1455 |
=cut
|
1362 | |
|
1363 | |
# Callbacks call this method
|
1364 | 1456 |
sub add_to_output {
|
|
1457 |
# Callbacks call this method
|
1365 | 1458 |
my $Self = shift;
|
1366 | 1459 |
$Self->{AppendOutput} = '' if !defined $Self->{AppendOutput};
|
1367 | 1460 |
$Self->{AppendOutput} .= shift;
|
|
1373 | 1466 |
$Self->{DelayedAppendOutput} .= shift;
|
1374 | 1467 |
}
|
1375 | 1468 |
|
|
1469 |
sub track_and_add_tag_to_output {
|
|
1470 |
my $Self = shift;
|
|
1471 |
push @{$Self->{TrackedAppendOutput}}, shift;
|
|
1472 |
}
|
|
1473 |
|
1376 | 1474 |
=item B<INTERNAL METHODS>
|
1377 | 1475 |
|
1378 | 1476 |
Generally these methods never need to be called by users of the class, because they'll be called internally as the appropriate tags are
|
|
1380 | 1478 |
|
1381 | 1479 |
=over 4
|
1382 | 1480 |
|
1383 | |
=item I<defang_script($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle)>
|
|
1481 |
=cut
|
|
1482 |
|
|
1483 |
=item I<defang_script_tag($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle)>
|
1384 | 1484 |
|
1385 | 1485 |
This method is invoked when a <script> tag is parsed. Defangs the <script> opening tag, and any closing tag. Any scripting content is also commented out, so browsers don't display them.
|
1386 | 1486 |
|
|
1435 | 1535 |
=back
|
1436 | 1536 |
|
1437 | 1537 |
=cut
|
1438 | |
|
1439 | |
sub defang_script {
|
|
1538 |
sub defang_script_tag {
|
1440 | 1539 |
my $Self = shift;
|
1441 | |
my ($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle) = @_;
|
|
1540 |
my ($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle) = @_;
|
1442 | 1541 |
warn "defang_script Processing <script> tag" if $Self->{Debug};
|
1443 | 1542 |
|
1444 | 1543 |
if (!$IsEndTag) {
|
1445 | 1544 |
|
1446 | |
# If we just parsed a starting <script> tag, code better be commented. If
|
1447 | |
# not, we attach comments around the code.
|
|
1545 |
# If we just parsed a starting <script> tag, find up to end tag
|
|
1546 |
# There's all sort of possible mess around this:
|
|
1547 |
# </script<foo> - not really an end tag
|
|
1548 |
# </script foo="bar > yes, still in a attribute"> - a valid end tag
|
|
1549 |
# For weird cases, we end script tag early and end up defanging script
|
|
1550 |
# content as HTML content, which is still safe
|
1448 | 1551 |
if ($$HtmlR =~ m{\G(.*?)(?=</script\b)}gcsi) {
|
1449 | 1552 |
my $ScriptTagContents = $1;
|
1450 | 1553 |
warn "defang_script ScriptTagContents $ScriptTagContents" if $Self->{Debug};
|
1451 | |
$ScriptTagContents =~ s/^(\s*)(<!--)?(.*?)(-->)?(\s*)$/$1<!-- $3 -->$5/s;
|
1452 | |
$Self->add_to_output($ScriptTagContents);
|
1453 | |
|
|
1554 |
if (!$Self->{delete_defang_content}) {
|
|
1555 |
$ScriptTagContents =~ s/<!--|-->|--//g;
|
|
1556 |
$ScriptTagContents = "<!-- " . $ScriptTagContents . " -->";
|
|
1557 |
$Self->add_to_output($ScriptTagContents);
|
|
1558 |
}
|
1454 | 1559 |
}
|
1455 | 1560 |
}
|
1456 | 1561 |
|
|
1458 | 1563 |
return DEFANG_ALWAYS;
|
1459 | 1564 |
}
|
1460 | 1565 |
|
1461 | |
=item I<defang_style($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle, $IsAttr)>
|
1462 | |
|
1463 | |
Builds a list of selectors and declarations from HTML style tags as well as style attributes in HTML tags and calls defang_stylerule() to do the actual defanging.
|
1464 | |
|
1465 | |
Returns 0 to indicate that style tags must not be defanged.
|
1466 | |
|
1467 | |
=over 4
|
1468 | |
|
1469 | |
=item B<Method parameters>
|
1470 | |
|
1471 | |
=over 4
|
1472 | |
|
1473 | |
=item I<$IsAttr>
|
1474 | |
|
1475 | |
Whether we are currently parsing a style attribute or style tag. $IsAttr will be true if we are currently parsing a style attribute.
|
1476 | |
|
1477 | |
=back
|
1478 | |
|
1479 | |
For a description of other parameters, see documentation of defang_script() method
|
1480 | |
|
1481 | |
=back
|
1482 | |
|
1483 | |
=cut
|
1484 | |
|
1485 | |
sub defang_style {
|
1486 | |
|
1487 | |
my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle, $IsAttr) = @_;
|
1488 | |
my $lcTag = lc $Tag;
|
1489 | |
|
1490 | |
warn "defang_style Tag=$Tag IsEndTag=$IsEndTag IsAttr=$IsAttr" if $Self->{Debug};
|
|
1566 |
sub defang_style_tag {
|
|
1567 |
my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle) = @_;
|
|
1568 |
|
|
1569 |
warn "defang_style_tag Tag=$lcTag IsEndTag=$IsEndTag" if $Self->{Debug};
|
|
1570 |
|
|
1571 |
# Defang attributes
|
|
1572 |
my $Defang = $Self->defang_attributes($OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle);
|
1491 | 1573 |
|
1492 | 1574 |
# Nothing to do if end tag
|
1493 | |
return DEFANG_NONE if !$IsAttr && $IsEndTag;
|
|
1575 |
return $Defang if $IsEndTag;
|
1494 | 1576 |
|
1495 | 1577 |
# Do all style work in byte mode
|
1496 | 1578 |
use bytes;
|
|
1499 | 1581 |
my $ClosingStyleTagPresent = 1;
|
1500 | 1582 |
|
1501 | 1583 |
for ($$HtmlR) {
|
1502 | |
|
1503 | |
if (!$IsAttr) {
|
1504 | |
if (m{\G(.*?)(?=</style\b)}gcis) {
|
1505 | |
$Content = $1;
|
1506 | |
|
1507 | |
# No ending style tag
|
1508 | |
} elsif (m{\G([^<]*)}gcis) {
|
1509 | |
$Content = $1;
|
1510 | |
$ClosingStyleTagPresent = 0;
|
1511 | |
}
|
1512 | |
# Its a style attribute
|
1513 | |
} else {
|
1514 | |
# Avoid undef warning for style attr with no value. eg <tag style>
|
1515 | |
$Content = defined($_) ? $_ : '';
|
1516 | |
}
|
1517 | |
}
|
1518 | |
|
|
1584 |
if (m{\G(.*?)(?=</style\b)}gcis) {
|
|
1585 |
$Content = $1;
|
|
1586 |
|
|
1587 |
# No ending style tag
|
|
1588 |
} elsif (m{\G([^<]*)}gcis) {
|
|
1589 |
$Content = $1;
|
|
1590 |
$ClosingStyleTagPresent = 0;
|
|
1591 |
}
|
|
1592 |
}
|
|
1593 |
|
1519 | 1594 |
# Handle any wrapping HTML comments. If no comments, we add
|
1520 | 1595 |
my ($OpeningHtmlComment, $ClosingHtmlComment) = ('', '');
|
1521 | |
if (!$IsAttr) {
|
1522 | |
$OpeningHtmlComment = $Content =~ s{^(\s*<!--)}{} ? $1 : "<!--";
|
1523 | |
$ClosingHtmlComment = $Content =~ s{(-->\s*)$}{} ? $1 : "-->";
|
1524 | |
}
|
|
1596 |
$OpeningHtmlComment = $Content =~ s{^(\s*<!--)}{} ? $1 . " " : "<!-- ";
|
|
1597 |
$ClosingHtmlComment = $Content =~ s{(-->\s*)$}{} ? " " . $1 : " -->";
|
|
1598 |
|
|
1599 |
# Check for large bogus style data with mostly HTML tags and blat it
|
|
1600 |
if (length $Content > 16384) {
|
|
1601 |
my $TagCount = 0;
|
|
1602 |
$TagCount++ while $Content =~ m{</?\w+\b[^>]*>}g;
|
|
1603 |
if ($TagCount > length($Content)/256) {
|
|
1604 |
$Content = '';
|
|
1605 |
}
|
|
1606 |
}
|
|
1607 |
|
|
1608 |
my $StyleOut = $Self->defang_style_text($Content, $lcTag, 0, undef, $HtmlR, $OutR);
|
|
1609 |
|
|
1610 |
$Self->add_to_output($OpeningHtmlComment . $StyleOut . $ClosingHtmlComment);
|
|
1611 |
$Self->add_to_output("</style>") if !$ClosingStyleTagPresent;
|
|
1612 |
|
|
1613 |
return $Defang;
|
|
1614 |
}
|
|
1615 |
|
|
1616 |
=item I<defang_style_text($Content, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR)>
|
|
1617 |
|
|
1618 |
Defang some raw css data and return the defanged content
|
|
1619 |
|
|
1620 |
=over 4
|
|
1621 |
|
|
1622 |
=item B<Method parameters>
|
|
1623 |
|
|
1624 |
=over 4
|
|
1625 |
|
|
1626 |
=item I<$Content>
|
|
1627 |
|
|
1628 |
The input style string that is defanged.
|
|
1629 |
|
|
1630 |
=item I<$IsAttr>
|
|
1631 |
|
|
1632 |
True if $Content is from an attribute, otherwise from a <style> block
|
|
1633 |
|
|
1634 |
=back
|
|
1635 |
|
|
1636 |
=back
|
|
1637 |
|
|
1638 |
=cut
|
|
1639 |
sub defang_style_text {
|
|
1640 |
my ($Self, $Content, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR) = @_;
|
1525 | 1641 |
|
1526 | 1642 |
# Clean up all comments, expand character escapes and such
|
1527 | 1643 |
$Self->cleanup_style($Content, $IsAttr);
|
|
1532 | 1648 |
warn "defang_style Naked=$Naked" if $Self->{Debug};
|
1533 | 1649 |
|
1534 | 1650 |
# And suitably change the regex to match the data
|
1535 | |
my $SelectorRuleRE = $Naked ? qr/(\s*)()()()($StyleRules)()(\s*)/o :
|
1536 | |
qr/(\s*)((?:$StyleSelectors)?)(\s*)(\{)($StyleRules)(\})(\s*)/o;
|
1537 | |
|
1538 | |
my (@Selectors, @SelectorRules, %ExtraData );
|
|
1651 |
my $SelectorRuleRE = $Naked ? $RECStyleNaked : $RECStyleSelected;
|
|
1652 |
|
|
1653 |
my (@Selectors, @SelectorRules, @ExtraData, @InMedia);
|
1539 | 1654 |
|
1540 | 1655 |
# Now we parse the selectors and declarations
|
1541 | |
while ($Content =~ m{\G.*?$SelectorRuleRE}sgc) {
|
1542 | |
my ($Selector, $SelectorRule) = ($2, $5);
|
1543 | |
last if $Selector eq '' && $SelectorRule eq '';
|
1544 | |
push @Selectors, $Selector;
|
1545 | |
push @SelectorRules, $SelectorRule;
|
1546 | |
warn "defang_style Selector=$Selector" if $Self->{Debug};
|
1547 | |
warn "defang_style SelectorRule=$SelectorRule" if $Self->{Debug};
|
1548 | |
$ExtraData{$Selector} = [ $1, $3, $4, $6, $7];
|
|
1656 |
while (1) {
|
|
1657 |
if ($Content =~ m{$RECStyleMediaSelector}sgco) {
|
|
1658 |
push @InMedia, $2;
|
|
1659 |
push @Selectors, $2;
|
|
1660 |
push @SelectorRules, "";
|
|
1661 |
push @ExtraData, [ $1, "", $3, "", $4 ];
|
|
1662 |
} elsif (@InMedia && $Content =~ m{\G(\s*)(\})(\s*)}sgc) {
|
|
1663 |
pop @InMedia;
|
|
1664 |
push @Selectors, "";
|
|
1665 |
push @SelectorRules, "";
|
|
1666 |
push @ExtraData, [ $1, "", "", $2, $3 ];
|
|
1667 |
} elsif ($Content =~ m{$SelectorRuleRE}sgc) {
|
|
1668 |
my ($Selector, $SelectorRule) = ($2, $5);
|
|
1669 |
last if $Selector eq '' && $SelectorRule =~ /^[;\s]*$/;
|
|
1670 |
$Selector = join("\000", @InMedia, $Selector) if @InMedia;
|
|
1671 |
push @Selectors, $Selector;
|
|
1672 |
push @SelectorRules, $SelectorRule;
|
|
1673 |
push @ExtraData, [ $1, $3, $4, $6, $7];
|
|
1674 |
warn "defang_style Selector=$Selector" if $Self->{Debug};
|
|
1675 |
warn "defang_style SelectorRule=$SelectorRule" if $Self->{Debug};
|
|
1676 |
|
|
1677 |
# Just a large bunch of selectors and no rules, suck up and discard
|
|
1678 |
} elsif (!$Naked && $Content =~ m{\G\s*$StyleSelectors\s*$}sgc) {
|
|
1679 |
# Looks like an html tag, suck up and discard
|
|
1680 |
} elsif (!$Naked && $Content =~ m{\G(?:</?$TagNameRE(?:\s[^>\{\}]*)?>\s*)+}sgc) {
|
|
1681 |
|
|
1682 |
# If content didn't match a rule, suck up whitespace
|
|
1683 |
} elsif ($Content =~ m{\G\s+}sgc) {
|
|
1684 |
# Or any non-whitespace, but try and sync to <...> tags
|
|
1685 |
} elsif ($Content =~ m{\G[^\s<]+}sgc || $Content =~ m{\G<+}sgc) {
|
|
1686 |
|
|
1687 |
# Nothing matched, must be at end
|
|
1688 |
} else { last; }
|
1549 | 1689 |
}
|
1550 | 1690 |
|
1551 | 1691 |
# Check declaration elements for defanging
|
1552 | |
$Self->defang_stylerule(\@Selectors, \@SelectorRules, $lcTag, $IsAttr, $HtmlR, $OutR);
|
|
1692 |
$Self->defang_stylerule(\@Selectors, \@SelectorRules, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR);
|
1553 | 1693 |
|
1554 | 1694 |
my $StyleOut = "";
|
1555 | 1695 |
|
|
1557 | 1697 |
foreach my $Selector (@Selectors) {
|
1558 | 1698 |
|
1559 | 1699 |
my $SelectorRule = shift @SelectorRules;
|
1560 | |
my $Spaces = $ExtraData{$Selector};
|
|
1700 |
my $Spaces = shift @ExtraData;
|
1561 | 1701 |
my ($BeforeSelector, $AfterSelector, $OpenBrace, $CloseBrace, $AfterRule) = @$Spaces if $Spaces;
|
1562 | |
($BeforeSelector, $AfterSelector, $AfterRule) = ("", " ", "\n") unless $ExtraData{$Selector};
|
|
1702 |
($BeforeSelector, $AfterSelector, $AfterRule) = ("", " ", "\n") unless $Spaces;
|
1563 | 1703 |
($OpenBrace, $CloseBrace) = ("{", "}") if !$Spaces && !$IsAttr;
|
1564 | 1704 |
|
1565 | 1705 |
# Put back the rule together
|
1566 | 1706 |
if (defined($Selector)) {
|
1567 | 1707 |
$StyleOut .= $BeforeSelector if defined($BeforeSelector);
|
1568 | |
$StyleOut .= $Selector;
|
|
1708 |
$StyleOut .= $Selector =~ /\000/ ? (reverse split /\000/, $Selector)[0] : $Selector;
|
1569 | 1709 |
$StyleOut .= $AfterSelector if defined($AfterSelector);
|
1570 | 1710 |
$StyleOut .= $OpenBrace if defined($OpenBrace);
|
1571 | 1711 |
$StyleOut .= $SelectorRule if defined($SelectorRule);
|
|
1577 | 1717 |
|
1578 | 1718 |
warn "defang_style StyleOut=$StyleOut" if $Self->{Debug};
|
1579 | 1719 |
|
1580 | |
if ($IsAttr) {
|
1581 | |
$$HtmlR = $StyleOut;
|
1582 | |
|
1583 | |
} else {
|
1584 | |
$Self->add_to_output($OpeningHtmlComment . $StyleOut . $ClosingHtmlComment);
|
1585 | |
$Self->add_to_output("</style>") if !$ClosingStyleTagPresent;
|
1586 | |
}
|
1587 | |
|
1588 | |
# We don't want <style> tags to be defanged
|
1589 | |
return DEFANG_NONE;
|
|
1720 |
return $StyleOut;
|
1590 | 1721 |
}
|
1591 | 1722 |
|
1592 | 1723 |
=item I<cleanup_style($StyleString)>
|
|
1608 | 1739 |
=back
|
1609 | 1740 |
|
1610 | 1741 |
=cut
|
1611 | |
|
1612 | 1742 |
sub cleanup_style {
|
1613 | 1743 |
my $Self = shift;
|
1614 | 1744 |
|
|
1649 | 1779 |
|
1650 | 1780 |
}
|
1651 | 1781 |
|
1652 | |
=item I<defang_stylerule($SelectorsIn, $StyleRules, $lcTag, $IsAttr, $HtmlR, $OutR)>
|
|
1782 |
=item I<defang_stylerule($SelectorsIn, $StyleRules, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR)>
|
1653 | 1783 |
|
1654 | 1784 |
Defangs style data.
|
1655 | 1785 |
|
|
1688 | 1818 |
=back
|
1689 | 1819 |
|
1690 | 1820 |
=cut
|
1691 | |
|
1692 | 1821 |
sub defang_stylerule {
|
1693 | |
|
1694 | |
my ($Self, $SelectorsIn, $StyleRules, $lcTag, $IsAttr, $HtmlR, $OutR) = @_;
|
|
1822 |
my ($Self, $SelectorsIn, $StyleRules, $lcTag, $IsAttr, $AttributeHash, $HtmlR, $OutR) = @_;
|
1695 | 1823 |
|
1696 | 1824 |
my (@SelectorStyleKeyValues, %SelectorStyleKeyExtraData);
|
1697 | 1825 |
|
|
1710 | 1838 |
warn "defang_stylerule Key=$Key Value=$Value Separator=$Separator ValueEnd=$ValueEnd" if $Self->{Debug};
|
1711 | 1839 |
# Store everything except style property and value in a hash
|
1712 | 1840 |
$StyleKeyExtraData{lc $Key} = [$KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail];
|
1713 | |
my $DefangStyleRule = DEFANG_DEFAULT;
|
|
1841 |
my $DefangStyleRule = $Self->{defang_default};
|
1714 | 1842 |
|
1715 | 1843 |
# If the style value has a URL in it and URL callback has been supplied, make a url_callback
|
1716 | |
if ($Self->{url_callback} && $Value =~ m/\s*url\(\s*((?:['"])?)(.*?)\1\s*\)/i) {
|
1717 | |
my ($UrlOrig, $Url) = ($2, $2) if $2;
|
1718 | |
warn "defang_stylerule Url found in style property value. Url=$Url" if $Self->{Debug};
|
1719 | |
my $lcAttrKey = $IsAttr ? "style" : undef;
|
1720 | |
$DefangStyleRule = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, \$Url, undef, $HtmlR, $OutR) if $Url;
|
1721 | |
# Save back any changes
|
1722 | |
warn "defang_stylerule After URL callback, Value=$Value DefangStyleRule=$DefangStyleRule" if $Self->{Debug};
|
1723 | |
$Value =~ s{\Q$UrlOrig\E}{$Url} if $UrlOrig;
|
|
1844 |
if ($Self->{url_callback}) {
|
|
1845 |
our $StartPos = 0;
|
|
1846 |
while ($Value =~ m/\G\s*url\(\s*((?:['"])?)(?{ $StartPos = pos; })(.*?)\1\s*\)/gci) {
|
|
1847 |
my ($UrlOrig, $Url) = $2 ? ($2, $2) : ('', '');
|
|
1848 |
my $EndPos = pos($Value);
|
|
1849 |
warn "defang_stylerule Url found in style property value. Url=$Url" if $Self->{Debug};
|
|
1850 |
my $lcAttrKey = $IsAttr ? "style" : undef;
|
|
1851 |
$DefangStyleRule = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, \$Url, $AttributeHash, $HtmlR, $OutR, lc $Key) if $Url;
|
|
1852 |
# Save back any changes
|
|
1853 |
warn "defang_stylerule After URL callback, Value=$Value DefangStyleRule=$DefangStyleRule" if $Self->{Debug};
|
|
1854 |
if ($UrlOrig) {
|
|
1855 |
substr($Value, $StartPos, length($UrlOrig), $Url);
|
|
1856 |
pos($Value) = $EndPos - length($UrlOrig) + length($Url);
|
|
1857 |
}
|
|
1858 |
}
|
1724 | 1859 |
}
|
1725 | 1860 |
|
1726 | 1861 |
# Save the style property, value and defang flag
|
|
1755 | 1890 |
for (my $k = 0; $k < @$KeyValueRules; $k++) {
|
1756 | 1891 |
my ($Key, $Value, $Defang) = @{$KeyValueRules->[$k]};
|
1757 | 1892 |
|
1758 | |
my $v = $ExtraData->{lc $Key};
|
1759 | |
my ($KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail) = @{$v || []};
|
1760 | |
|
|
1893 |
my $v = ($ExtraData->{lc $Key} ||= []);
|
|
1894 |
my ($KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail) = @$v;
|
|
1895 |
|
|
1896 |
# Always need a separator
|
|
1897 |
$v->[1] = defined $v->[1] ? $v->[1] : ':';
|
1761 | 1898 |
# If an intermediate style property-value pair doesn't have a terminating semi-colon, add it
|
1762 | |
if ($k > 0 && !$v) {
|
1763 | |
my $PreviousRule = $KeyValueRules->[$k - 1];
|
1764 | |
my $PreviousKey = $PreviousRule->[0];
|
1765 | |
my $PrevExtra = $ExtraData->{lc $PreviousKey};
|
1766 | |
$ExtraData->{lc $PreviousKey}->[4] .= ";" if defined($PrevExtra->[4]) && $PrevExtra->[4] !~ m/;/;
|
1767 | |
$ExtraData->{lc $Key}->[1] = ":";
|
|
1899 |
if ($k < @$KeyValueRules - 1) {
|
|
1900 |
$v->[4] .= ";" if !defined $v->[4] || $v->[4] !~ m/;/;
|
1768 | 1901 |
}
|
1769 | 1902 |
|
1770 | 1903 |
}
|
|
1789 | 1922 |
my ($Key, $Value, $Defang) = @$KeyValueRule;
|
1790 | 1923 |
my $v = $ExtraData->{lc $Key};
|
1791 | 1924 |
my ($KeyPilot, $Separator, $QuoteStart, $QuoteEnd, $ValueEnd, $ValueTrail) = @{$v || []};
|
1792 | |
($Separator, $ValueEnd, $ValueTrail) = (":", ";", " ") unless $v;
|
1793 | |
|
|
1925 |
|
1794 | 1926 |
# Flag to defang if a url, expression or unallowed character found
|
1795 | 1927 |
if ($Defang == DEFANG_DEFAULT) {
|
1796 | 1928 |
$Defang = $Value =~ m{^\s*[a-z0-9%!"'`:()#\s.,\/+-]+\s*;?\s*$}i ? DEFANG_NONE : DEFANG_ALWAYS;
|
1797 | |
$Defang = $Value =~ m{^\s*url\s*\(}i ? DEFANG_ALWAYS : $Defang;
|
1798 | |
$Defang = $Value =~ m{^\s*expression\s*\(}i ? DEFANG_ALWAYS : $Defang;
|
|
1929 |
$Defang = $Value =~ m{\burl\s*\(}i ? DEFANG_ALWAYS : $Defang;
|
|
1930 |
$Defang = $Value =~ m{\bexpression\s*\(}i ? DEFANG_ALWAYS : $Defang;
|
1799 | 1931 |
}
|
1800 | 1932 |
|
1801 | 1933 |
($KeyPilot, $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd, $ValueTrail) =
|
|
1803 | 1935 |
($KeyPilot, $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd, $ValueTrail);
|
1804 | 1936 |
|
1805 | 1937 |
# Comment out the style property-value pair if $Defang
|
1806 | |
$Key = $Defang != DEFANG_NONE ? "/*" . $Key : $Key;
|
1807 | |
$ValueEnd = $Defang != DEFANG_NONE ? $ValueEnd . "*/" : $ValueEnd;
|
|
1938 |
my $CommentDefang = 0;
|
|
1939 |
if ($Defang != DEFANG_NONE) {
|
|
1940 |
if ($Self->{delete_defang_content}) {
|
|
1941 |
$KeyPilot = $Key = $Separator = $QuoteStart = $Value = $QuoteEnd = $ValueEnd = $ValueTrail = '';
|
|
1942 |
} else {
|
|
1943 |
$CommentDefang = 1;
|
|
1944 |
}
|
|
1945 |
}
|
1808 | 1946 |
|
1809 | 1947 |
# Put the rule together back
|
1810 | 1948 |
if (defined($Key)) {
|
1811 | |
$Rule .= join "", $KeyPilot, $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd, $ValueTrail;
|
|
1949 |
my $RuleContent = join "", $Key, $Separator, $QuoteStart, $Value, $QuoteEnd, $ValueEnd;
|
|
1950 |
if ($CommentDefang) {
|
|
1951 |
# Strip any mismatched comment markers, then add our own
|
|
1952 |
$RuleContent =~ s{/\*}{}g;
|
|
1953 |
$RuleContent =~ s{\*/}{}g;
|
|
1954 |
$RuleContent = "/*" . $RuleContent . "*/";
|
|
1955 |
}
|
|
1956 |
$Rule .= join "", $KeyPilot, $RuleContent, $ValueTrail;
|
1812 | 1957 |
}
|
1813 | 1958 |
|
1814 | 1959 |
warn "defang_stylerule Rule=$Rule" if $Self->{Debug};
|
|
1832 | 1977 |
|
1833 | 1978 |
=item B<Method parameters>
|
1834 | 1979 |
|
1835 | |
For a description of the method parameters, see documentation of defang_script() method
|
|
1980 |
For a description of the method parameters, see documentation of defang_script_tag() method
|
1836 | 1981 |
|
1837 | 1982 |
=back
|
1838 | 1983 |
|
1839 | 1984 |
=cut
|
1840 | |
|
1841 | 1985 |
sub defang_attributes {
|
1842 | |
my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagTrail, $Attributes, $CloseAngle) = @_;
|
1843 | |
my $lcTag = lc $Tag;
|
|
1986 |
my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle) = @_;
|
1844 | 1987 |
|
1845 | 1988 |
my $Debug = $Self->{Debug};
|
|
1989 |
|
|
1990 |
my $DefangTag = $Self->{defang_default};
|
|
1991 |
my ($DefangTagUrlOverride, $DefangTagAttrOverride);
|
1846 | 1992 |
|
1847 | 1993 |
# Create a key -> \value mapping of all attributes up front
|
1848 | 1994 |
# so we have a complete hash for each callback
|
1849 | |
my %AttributeHash = map { lc($_->[0]) => \$_->[4] } @$Attributes;
|
|
1995 |
my %AttributeHash;
|
|
1996 |
for my $Attr (@$Attributes) {
|
|
1997 |
my ($AttrKey, $AttrValR) = ($Attr->[0], \$Attr->[4]);
|
|
1998 |
|
|
1999 |
my $lcAttrKey = $Attr->[7] = defined $AttrKey ? lc($AttrKey) : "";
|
|
2000 |
|
|
2001 |
# Get the attribute value cleaned up
|
|
2002 |
$$AttrValR = $Self->cleanup_attribute($$AttrValR);
|
|
2003 |
warn "defang_attributes cleaned AttrVal=$$AttrValR" if $Debug;
|
|
2004 |
|
|
2005 |
$AttributeHash{$lcAttrKey} = $AttrValR;
|
|
2006 |
}
|
|
2007 |
|
|
2008 |
# Callback if the tag is in @$tags_to_callback
|
|
2009 |
if (exists($Self->{tags_to_callback}->{$lcTag})) {
|
|
2010 |
warn "defang_attributes Calling tags_callback for $lcTag" if $Debug;
|
|
2011 |
$DefangTag = $Self->{tags_callback}->($Self->{context}, $Self, $OpenAngle, $lcTag, $IsEndTag, \%AttributeHash, $CloseAngle, $HtmlR, $OutR);
|
|
2012 |
}
|
1850 | 2013 |
|
1851 | 2014 |
# Now process each attribute
|
1852 | 2015 |
foreach my $Attr (@$Attributes) {
|
1853 | |
|
1854 | |
# We get the key and value of the attribute
|
1855 | |
my ($AttrKey, $AttrValR) = ($Attr->[0], \$Attr->[4]);
|
1856 | |
my $lcAttrKey = lc $AttrKey;
|
1857 | |
warn "defang_attributes Tag=$Tag AttrKey=$AttrKey AttrVal=$$AttrValR" if $Debug;
|
1858 | |
|
1859 | |
# Get the attribute value cleaned up
|
1860 | |
($$AttrValR, my $AttrValStripped) = $Self->cleanup_attribute($Attr, $AttrKey, $$AttrValR);
|
1861 | |
warn "defang_attributes AttrValStripped=$AttrValStripped" if $Debug;
|
|
2016 |
my ($lcAttrKey, $AttrKey, $AttrValR) = ($Attr->[7], $Attr->[0], \$Attr->[4]);
|
|
2017 |
|
|
2018 |
warn "defang_attributes Tag=$lcTag lcAttrKey=$lcAttrKey AttrVal=$$AttrValR" if $Debug;
|
1862 | 2019 |
|
1863 | 2020 |
my $AttribRule = "";
|
1864 | |
if (ref($Tags{$lcTag})) {
|
|
2021 |
if (ref($Tags{$lcTag}) eq 'HASH') {
|
1865 | 2022 |
$AttribRule = $Tags{$lcTag}{$lcAttrKey};
|
1866 | 2023 |
}
|
1867 | 2024 |
|
1868 | |
my $DefangAttrib = DEFANG_DEFAULT;
|
1869 | |
|
1870 | |
$AttribRule = $CommonAttributes{$lcAttrKey} unless $AttribRule;
|
|
2025 |
my $DefangAttrib = $Self->{defang_default};
|
|
2026 |
|
|
2027 |
$AttribRule ||= $CommonAttributes{$lcAttrKey};
|
1871 | 2028 |
warn "defang_attributes AttribRule=$AttribRule" if $Debug;
|
1872 | 2029 |
|
1873 | 2030 |
# If this is a URL type $AttrKey and URL callback method is supplied, make a url_callback
|
1874 | 2031 |
if ($Self->{url_callback} && $AttribRule && exists($UrlRules{$AttribRule})) {
|
1875 | 2032 |
warn "defang_attributes Making URL callback" if $Debug;
|
1876 | |
$DefangAttrib = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, \%AttributeHash, $HtmlR, $OutR);
|
|
2033 |
($DefangAttrib, $DefangTagUrlOverride) = $Self->{url_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, \%AttributeHash, $HtmlR, $OutR, { defang_attrib => $DefangAttrib, attrib_rule => $AttribRule });
|
1877 | 2034 |
die "url_callback reset" if !defined pos($$HtmlR);
|
1878 | 2035 |
}
|
1879 | 2036 |
|
1880 | 2037 |
# We have a style attribute, so we call defang_style
|
1881 | 2038 |
if ($lcAttrKey eq "style") {
|
1882 | 2039 |
warn "defang_attributes Found style attribute, calling defang_style" if $Debug;
|
1883 | |
$Self->defang_style($OutR, $AttrValR, $TagOps, $OpenAngle, $IsEndTag, $lcTag, $TagTrail, $Attributes, $CloseAngle, 1);
|
|
2040 |
$$AttrValR = $Self->defang_style_text($$AttrValR, $lcTag, 1, \%AttributeHash, $HtmlR, $OutR);
|
1884 | 2041 |
}
|
1885 | 2042 |
|
1886 | 2043 |
# If a attribute callback is supplied and its interested in this attribute, we make a attribs_callback
|
1887 | 2044 |
if ($Self->{attribs_callback} && exists($Self->{attribs_to_callback}->{$lcAttrKey})) {
|
1888 | |
warn "defang_attributes Making attribute callback for Tag=$Tag AttrKey=$AttrKey" if $Debug;
|
1889 | |
my $DefangResult = $Self->{attribs_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, $HtmlR, $OutR);
|
|
2045 |
warn "defang_attributes Making attribute callback for Tag=$lcTag AttrKey=$AttrKey" if $Debug;
|
|
2046 |
(my $DefangResult, $DefangTagAttrOverride) = $Self->{attribs_callback}->($Self->{context}, $Self, $lcTag, $lcAttrKey, $AttrValR, $HtmlR, $OutR);
|
1890 | 2047 |
# Only use new result if not already DEFANG_ALWAYS from url_callback
|
1891 | 2048 |
$DefangAttrib = $DefangResult if $DefangAttrib != DEFANG_ALWAYS;
|
1892 | 2049 |
}
|
1893 | 2050 |
|
1894 | |
if (($DefangAttrib == DEFANG_DEFAULT) && $AttribRule) {
|
1895 | |
my $Rule = $Rules{$AttribRule};
|
1896 | |
warn "defang_attributes AttribRule=$AttribRule Rule=$Rule" if $Debug;
|
1897 | |
|
1898 | |
# We whitelist the attribute if the value matches the rule
|
1899 | |
if (ref($Rule) eq "Regexp") {
|
1900 | |
$DefangAttrib = ($AttrValStripped =~ $Rule) ? DEFANG_NONE : DEFANG_ALWAYS;
|
1901 | |
}
|
1902 | |
|
1903 | |
# Hack. Ref to array is a blacklist regexp
|
1904 | |
if (ref($Rule) eq "ARRAY") {
|
1905 | |
$DefangAttrib = ($AttrValStripped =~ $Rule->[0]) ? DEFANG_ALWAYS : DEFANG_NONE;
|
1906 | |
}
|
1907 | |
|
1908 | |
} elsif (!$AttribRule) {
|
1909 | |
$DefangAttrib = DEFANG_ALWAYS;
|
1910 | |
}
|
1911 | |
|
1912 | |
warn "defang_attributes DefangAttrib=$DefangAttrib" if $Debug;
|
|
2051 |
# Check if the final attribute value needs defanging
|
|
2052 |
$DefangAttrib = $Self->defang_attribute_value($AttrValR, $AttribRule, $DefangAttrib);
|
1913 | 2053 |
|
1914 | 2054 |
# Store the attribute defang flag
|
1915 | |
push @$Attr, $DefangAttrib if $DefangAttrib != DEFANG_NONE;
|
1916 | |
|
1917 | |
}
|
1918 | |
|
1919 | |
my $DefangTag = DEFANG_DEFAULT;
|
1920 | |
|
1921 | |
# Callback if the tag is in @$tags_to_callback
|
1922 | |
if (exists($Self->{tags_to_callback}->{$lcTag})) {
|
1923 | |
warn "defang_attributes Calling tags_callback for $Tag" if $Debug;
|
1924 | |
$DefangTag = $Self->{tags_callback}->($Self->{context}, $Self, $OpenAngle, $lcTag, $IsEndTag, \%AttributeHash, $CloseAngle, $HtmlR, $OutR);
|
|
2055 |
$Attr->[8] = $DefangAttrib;
|
|
2056 |
|
1925 | 2057 |
}
|
1926 | 2058 |
|
1927 | 2059 |
my @OutputAttributes;
|
1928 | 2060 |
|
1929 | 2061 |
foreach my $Attr (@$Attributes) {
|
1930 | 2062 |
|
1931 | |
my $lcAttr = lc $Attr->[0];
|
|
2063 |
my $lcAttr = $Attr->[7];
|
1932 | 2064 |
|
1933 | 2065 |
# If the attribute is deleted don't output it
|
1934 | 2066 |
unless ($AttributeHash{$lcAttr}) {
|
1935 | |
warn "defang_attributes Marking attribute $Attr->[0] for deletion" if $Debug;
|
|
2067 |
warn "defang_attributes Marking attribute $lcAttr for deletion" if $Debug;
|
1936 | 2068 |
next;
|
1937 | 2069 |
}
|
1938 | 2070 |
|
1939 | 2071 |
# And we attach the defang string here, if the attribute should be defanged
|
1940 | 2072 |
# (attribute could be undef for buggy html, eg <ahref=blah>)
|
1941 | |
$Attr->[0] = $Self->{defang_string}
|
1942 | |
. ( $Attr->[0] || '' )
|
1943 | |
if defined($Attr->[7]) && $Attr->[7] != DEFANG_NONE
|
1944 | |
&& (
|
1945 | |
$Self->{allow_double_defang}
|
1946 | |
|| (
|
1947 | |
substr( ( $Attr->[0] || '' ),
|
1948 | |
0, length( $Self->{defang_string} ) ) ne $Self->{defang_string}
|
1949 | |
)
|
1950 | |
);
|
1951 | |
# Set defang value to undef, or this value will appear in the output
|
1952 | |
$Attr->[7] = undef;
|
|
2073 |
if ($Attr->[8] != DEFANG_NONE) {
|
|
2074 |
if ($Self->{delete_defang_content}) {
|
|
2075 |
@$Attr = ('') x 7;
|
|
2076 |
} else {
|
|
2077 |
$Attr->[0] = $Self->{defang_string} . ($Attr->[0] || '')
|
|
2078 |
if $Self->{allow_double_defang} || ($Attr->[0] || '') !~ $Self->{defang_re};
|
|
2079 |
}
|
|
2080 |
}
|
|
2081 |
|
|
2082 |
# Remove non-attribute values so they're not in the output
|
|
2083 |
splice(@$Attr, 7);
|
1953 | 2084 |
|
1954 | 2085 |
# Requote specials in attribute value
|
1955 | |
my $QuoteRe = $QuoteRe{$Attr->[3]} || $QuoteRe{""};
|
1956 | |
$Attr->[4] =~ s/$QuoteRe/'&'.$CharToEntity{$1}.';'/eg
|
1957 | |
if defined($Attr->[4]);
|
|
2086 |
if (defined $Attr->[4]) {
|
|
2087 |
my $QuoteRe = $QuoteRe{$Attr->[3]} || $QuoteRe{""};
|
|
2088 |
$Attr->[4] =~ s/$CtrlChars//go; # strip ctrls
|
|
2089 |
$Attr->[4] =~ s/$QuoteRe/'&'.$CharToEntity{$1}.';'/eg;
|
|
2090 |
}
|
1958 | 2091 |
|
1959 | 2092 |
# Add to attributes to output
|
1960 | 2093 |
push @OutputAttributes, $Attr;
|
|
1965 | 2098 |
|
1966 | 2099 |
# Append all remaining attribute keys (which must have been newly added attributes by
|
1967 | 2100 |
# the callback)and values in no particular order
|
|
2101 |
my $QuoteRe = $QuoteRe{'"'};
|
1968 | 2102 |
while (my ($Key,$Value) = each %AttributeHash ) {
|
1969 | 2103 |
my $Attr = [" " . $Key, "", "=", '"', $$Value, '"', ""];
|
1970 | 2104 |
if (defined $Attr->[4]) {
|
1971 | |
$Attr->[4] =~ s/(['"<>&])/'&'.$CharToEntity{$1}.';'/eg
|
|
2105 |
$Attr->[4] =~ s/$CtrlChars//g; # strip ctrls
|
|
2106 |
$Attr->[4] =~ s/$QuoteRe/'&'.$CharToEntity{$1}.';'/eg
|
1972 | 2107 |
} else {
|
1973 | 2108 |
@$Attr[2..6] = (undef) x 5;
|
1974 | 2109 |
}
|
|
1982 | 2117 |
if ($DefangTag == DEFANG_DEFAULT && (my $TagOps = $Tags{$lcTag})) {
|
1983 | 2118 |
$DefangTag = DEFANG_NONE;
|
1984 | 2119 |
}
|
1985 | |
|
|
2120 |
# Unless we have overrides
|
|
2121 |
$DefangTag = DEFANG_ALWAYS
|
|
2122 |
if defined($DefangTagUrlOverride) && $DefangTagUrlOverride == DEFANG_ALWAYS;
|
|
2123 |
$DefangTag = DEFANG_ALWAYS
|
|
2124 |
if defined($DefangTagAttrOverride) && $DefangTagAttrOverride == DEFANG_ALWAYS;
|
|
2125 |
|
1986 | 2126 |
return $DefangTag;
|
1987 | 2127 |
}
|
1988 | 2128 |
|
1989 | |
sub track_tags {
|
1990 | |
my ($Self, $OutR, $HtmlR, $TagOps, $OpenAngle, $IsEndTag, $Tag, $TagContentR) = @_;
|
1991 | |
my $lcTag = lc $Tag;
|
1992 | |
|
1993 | |
my ($OpenedTags, $OpenedTagsCount, $OpenedNestedTags)
|
1994 | |
= @$Self{qw(opened_tags opened_tags_count opened_nested_tags)};
|
1995 | |
my $IsTagToFix = $Self->{mismatched_tags_to_fix}->{$lcTag};
|
|
2129 |
sub defang_attribute_value {
|
|
2130 |
my ($Self, $AttrValR, $AttribRule, $DefangAttrib) = @_;
|
|
2131 |
|
|
2132 |
my $Debug = $Self->{Debug};
|
|
2133 |
|
|
2134 |
my $AttrValStripped = $Self->strip_attribute($$AttrValR);
|
|
2135 |
|
|
2136 |
warn "defang_attribute_value AttrVal=$$AttrValR AttrValStripped=$AttrValStripped" if $Debug;
|
|
2137 |
|
|
2138 |
if (($DefangAttrib == DEFANG_DEFAULT) && $AttribRule) {
|
|
2139 |
my $Rule = $Rules{$AttribRule};
|
|
2140 |
warn "defang_attributes AttribRule=$AttribRule Rule=$Rule" if $Debug;
|
|
2141 |
|
|
2142 |
# We whitelist the attribute if the value matches the rule
|
|
2143 |
if (ref($Rule) eq "Regexp") {
|
|
2144 |
$DefangAttrib = ($AttrValStripped =~ $Rule) ? DEFANG_NONE : DEFANG_ALWAYS;
|
|
2145 |
}
|
|
2146 |
|
|
2147 |
# Hack. Ref to array is a blacklist regexp
|
|
2148 |
if (ref($Rule) eq "ARRAY") {
|
|
2149 |
$DefangAttrib = ($AttrValStripped =~ $Rule->[0]) ? DEFANG_ALWAYS : DEFANG_NONE;
|
|
2150 |
}
|
|
2151 |
|
|
2152 |
} elsif (!$AttribRule && $DefangAttrib != DEFANG_NONE) {
|
|
2153 |
$DefangAttrib = DEFANG_ALWAYS;
|
|
2154 |
}
|
|
2155 |
|
|
2156 |
warn "defang_attribute_value DefangAttrib=$DefangAttrib" if $Debug;
|
|
2157 |
|
|
2158 |
return $DefangAttrib;
|
|
2159 |
}
|
|
2160 |
|
|
2161 |
sub track_implicit_tags {
|
|
2162 |
my ($Self, $OutR, $HtmlR, $lcTag, $IsEndTag) = @_;
|
|
2163 |
|
|
2164 |
my $OpenedTags = $Self->{opened_tags};
|
|
2165 |
return if !@$OpenedTags;
|
|
2166 |
|
|
2167 |
# If just closing the last tag, nothing to do
|
|
2168 |
my $LastTag = $OpenedTags->[-1]->[0];
|
|
2169 |
|
|
2170 |
# Are we expecting a particular tag based on last open tag?
|
|
2171 |
if (my $ImplicitTags = $ImplicitOpenTags{$LastTag}) {
|
|
2172 |
|
|
2173 |
# We didn't get a tag we were expecting (eg <table><div> rather
|
|
2174 |
# than <table><tbody><tr><td><div>), so insert opening tags recursively
|
|
2175 |
$LastTag = $lcTag;
|
|
2176 |
while ($ImplicitTags && (!$ImplicitTags->{$LastTag} || $IsEndTag)) {
|
|
2177 |
my $Tag = $ImplicitTags->{default};
|
|
2178 |
# Don't insert implicit tag if it's actually the one we actually just parsed
|
|
2179 |
last if !$IsEndTag && $Tag eq $lcTag;
|
|
2180 |
$Self->open_tag(1, $OutR, $HtmlR, $Tag, \"");
|
|
2181 |
$$OutR .= "<!-- $Tag implicit open due to $LastTag -->" unless $Self->{quiet};
|
|
2182 |
$LastTag = $Tag;
|
|
2183 |
$ImplicitTags = $ImplicitOpenTags{$LastTag};
|
|
2184 |
}
|
|
2185 |
}
|
|
2186 |
}
|
|
2187 |
|
|
2188 |
sub track_in_to_block_tags {
|
|
2189 |
my ($Self, $OutR, $HtmlR, $lcTag) = @_;
|
1996 | 2190 |
|
1997 | 2191 |
# If we've got a block tag, then close any inline tags
|
1998 | 2192 |
# before the block tag. We'll re-opened them again below
|
1999 | |
if ($BlockTags{$lcTag}) {
|
|
2193 |
if ($BlockTags{$lcTag} && !$TableTags{$lcTag}) {
|
|
2194 |
my ($OpenedTags, $ClosedIntoBlockTags, $Quiet)
|
|
2195 |
= @$Self{qw(opened_tags closed_into_block_tags quiet)};
|
|
2196 |
|
2000 | 2197 |
while (@$OpenedTags && $InlineTags{$OpenedTags->[-1]->[0]}) {
|
2001 | |
push @$OpenedNestedTags, my $POTD = $OpenedTags->[-1];
|
2002 | |
$Self->track_tag(1, $POTD->[0]);
|
2003 | |
$$OutR .= "<!-- close inline tag into block --></$POTD->[0]>";
|
2004 | |
}
|
2005 | |
}
|
2006 | |
|
2007 | |
# Track browser implicitly opened tags
|
2008 | |
if (@$OpenedTags) {
|
2009 | |
|
2010 | |
# If just closing the last tag, nothing to do
|
2011 | |
my $LastTag = $OpenedTags->[-1]->[0];
|
2012 | |
if (!$IsEndTag || $lcTag ne $LastTag) {
|
2013 | |
|
2014 | |
# Are we expecting a particular tag based on last open tag?
|
2015 | |
if (my $ImplicitTags = $ImplicitOpenTags{$LastTag}) {
|
2016 | |
|
2017 | |
# We didn't get a tag we were expecting (eg <table><div> rather
|
2018 | |
# than <table><tbody><tr><td><div>), so insert opening tags recursively
|
2019 | |
$LastTag = $lcTag;
|
2020 | |
while ($ImplicitTags && (!$ImplicitTags->{$LastTag} || $IsEndTag)) {
|
2021 | |
my $Tag = $ImplicitTags->{default};
|
2022 | |
# Don't insert implicit tag if it's actually the one we actually just parsed
|
2023 | |
last if !$IsEndTag && $Tag eq $lcTag;
|
2024 | |
$Self->track_tag(0, $Tag, '');
|
2025 | |
$$OutR .= "<!-- $Tag implicit open due to $LastTag --><$Tag>";
|
2026 | |
$LastTag = $Tag;
|
2027 | |
$ImplicitTags = $ImplicitOpenTags{$LastTag};
|
2028 | |
}
|
2029 | |
}
|
2030 | |
}
|
2031 | |
}
|
2032 | |
|
2033 | |
# Check for correctly nest closing tags
|
2034 | |
if ($IsEndTag && $IsTagToFix) {
|
|
2198 |
push @$ClosedIntoBlockTags, my $POTD = $OpenedTags->[-1];
|
|
2199 |
$$OutR .= "<!-- close inline tag into block -->" unless $Quiet;
|
|
2200 |
$Self->close_tag(1, $OutR, $HtmlR, $POTD->[0], 1);
|
|
2201 |
}
|
|
2202 |
}
|
|
2203 |
}
|
|
2204 |
|
|
2205 |
sub track_out_of_block_tags {
|
|
2206 |
my ($Self, $OutR, $HtmlR, $lcTag) = @_;
|
|
2207 |
|
|
2208 |
my $ClosedIntoBlockTags = $Self->{closed_into_block_tags};
|
|
2209 |
return if !@$ClosedIntoBlockTags;
|
|
2210 |
|
|
2211 |
# Re-open inline tags into this block
|
|
2212 |
|
|
2213 |
# Peek ahead. If another block tag, don't do this
|
|
2214 |
return if $$HtmlR =~ m{\G(?=\s*</?($TagNameRE))}gc
|
|
2215 |
&& $BlockTags{lc "$1"} && !$TableTags{lc "$1"};
|
|
2216 |
|
|
2217 |
my ($SpanCount, $SpanAttrs) = (0, '');
|
|
2218 |
while (my $POTD = pop @$ClosedIntoBlockTags) {
|
|
2219 |
|
|
2220 |
# Don't add more than 3 span tags with same attrs in a row
|
|
2221 |
if ($SpanCount < 3) {
|
|
2222 |
# Add after the current tag is output
|
|
2223 |
$Self->track_and_add_tag_to_output($POTD);
|
|
2224 |
$Self->add_to_output("<!-- reopen inline tag after block -->") unless $Self->{quiet};
|
|
2225 |
}
|
|
2226 |
$SpanCount = $POTD->[0] eq 'span' && $POTD->[1] eq $SpanAttrs ? $SpanCount+1 : 0;
|
|
2227 |
$SpanAttrs = $POTD->[1];
|
|
2228 |
}
|
|
2229 |
}
|
|
2230 |
|
|
2231 |
sub open_tag {
|
|
2232 |
my ($Self, $AddOutput, $OutR, $HtmlR, $lcTag, $TagContentR, $NoBlockCheck) = @_;
|
|
2233 |
|
|
2234 |
$Self->track_implicit_tags($OutR, $HtmlR, $lcTag, 0);
|
|
2235 |
$Self->track_in_to_block_tags($OutR, $HtmlR, $lcTag)
|
|
2236 |
unless $NoBlockCheck;
|
|
2237 |
|
|
2238 |
# Track this tag that was opened (and all attributes, so we can re-open with the same if needed)
|
|
2239 |
if ($Self->{mismatched_tags_to_fix}->{$lcTag}) {
|
|
2240 |
push @{$Self->{opened_tags}}, [ $lcTag, $$TagContentR ];
|
|
2241 |
$Self->{opened_tags_count}->{$lcTag}++;
|
|
2242 |
}
|
|
2243 |
if ($AddOutput) {
|
|
2244 |
$$OutR .= "<${lcTag}${$TagContentR}>";
|
|
2245 |
}
|
|
2246 |
|
|
2247 |
$Self->track_out_of_block_tags($OutR, $HtmlR, $lcTag)
|
|
2248 |
unless $NoBlockCheck;
|
|
2249 |
|
|
2250 |
return DEFANG_NONE;
|
|
2251 |
}
|
|
2252 |
|
|
2253 |
my %RECache;
|
|
2254 |
sub close_tag {
|
|
2255 |
my ($Self, $AddOutput, $OutR, $HtmlR, $lcTag, $NoBlockCheck) = @_;
|
|
2256 |
|
|
2257 |
my ($OpenedTags, $OpenedTagsCount, $Quiet)
|
|
2258 |
= @$Self{qw(opened_tags opened_tags_count quiet)};
|
|
2259 |
|
|
2260 |
# If just closing the last tag, nothing to do
|
|
2261 |
if (@$OpenedTags && $OpenedTags->[-1]->[0] ne $lcTag) {
|
|
2262 |
$Self->track_implicit_tags($OutR, $HtmlR, $lcTag, 1);
|
|
2263 |
}
|
|
2264 |
|
|
2265 |
$Self->track_in_to_block_tags($OutR, $HtmlR, $lcTag)
|
|
2266 |
unless $NoBlockCheck;
|
|
2267 |
|
|
2268 |
# Check for correctly nested closing tags
|
|
2269 |
my $IsTagToFix = $Self->{mismatched_tags_to_fix}->{$lcTag};
|
|
2270 |
if ($IsTagToFix) {
|
2035 | 2271 |
my ($Found, $ClosingTags) = (0, '');
|
2036 | 2272 |
|
2037 | 2273 |
# Tag not even open, just defang it
|
|
2052 | 2288 |
}
|
2053 | 2289 |
|
2054 | 2290 |
# Close this mismatched tag
|
2055 | |
$Self->track_tag(1, $PreviousOpenedTag);
|
2056 | |
$$OutR .= "<!-- close mismatched tag --></$PreviousOpenedTag>";
|
2057 | |
}
|
2058 | |
|
2059 | |
# Should have popped stack correctly if found
|
2060 | |
if ($Found) {
|
2061 | |
pop @$OpenedTags;
|
2062 | |
$OpenedTagsCount->{$lcTag}--;
|
|
2291 |
$$OutR .= "<!-- close mismatched tag -->" unless $Quiet;
|
|
2292 |
$Self->close_tag(1, $OutR, $HtmlR, $PreviousOpenedTag);
|
|
2293 |
}
|
2063 | 2294 |
|
2064 | 2295 |
# Otherwise hit tag that stops breaking out, defang it
|
2065 | |
} else {
|
|
2296 |
if (!$Found) {
|
2066 | 2297 |
return DEFANG_ALWAYS;
|
2067 | 2298 |
}
|
2068 | |
|
2069 | |
}
|
2070 | |
|
2071 | |
|
2072 | |
# Track this tag that was opened (and all attributes, so we can re-open with the same if needed)
|
2073 | |
if (!$IsEndTag && $IsTagToFix) {
|
2074 | |
push @$OpenedTags, [ $lcTag, $$TagContentR ];
|
2075 | |
$OpenedTagsCount->{$lcTag}++;
|
2076 | |
}
|
2077 | |
|
2078 | |
# Re-open inline tags into this block
|
2079 | |
if ($BlockTags{$lcTag}) {
|
2080 | |
# Peek ahead. If another block tag, don't do this
|
2081 | |
if ($$HtmlR !~ m{\G(?=\s*</?($TagNameRE))}gc || !$BlockTags{lc "$1"}) {
|
2082 | |
my ($SpanCount, $SpanAttrs) = (0, '');
|
2083 | |
while (my $POTD = pop @$OpenedNestedTags) {
|
2084 | |
|
2085 | |
# Don't add more than 3 span tags with same attrs in a row
|
2086 | |
if ($SpanCount < 3) {
|
2087 | |
# Add after the current tag is output
|
2088 | |
$Self->track_tag(0, @$POTD);
|
2089 | |
$Self->add_to_output("<!-- reopen inline tag after block --><$POTD->[0]$POTD->[1]>");
|
2090 | |
}
|
2091 | |
$SpanCount = $POTD->[0] eq 'span' && $POTD->[1] eq $SpanAttrs ? $SpanCount+1 : 0;
|
2092 | |
$SpanAttrs = $POTD->[1];
|
|
2299 |
}
|
|
2300 |
|
|
2301 |
my $Result = DEFANG_NONE;
|
|
2302 |
|
|
2303 |
if ($IsTagToFix && $Self->{empty_tags_to_collapse}->{$lcTag}) {
|
|
2304 |
|
|
2305 |
# Check if previous output is open tag, or just comments
|
|
2306 |
my $LastTagPos = rindex($$OutR, '<');
|
|
2307 |
while ($LastTagPos >= 0) {
|
|
2308 |
pos($$OutR) = $LastTagPos;
|
|
2309 |
my $RE = (defined $RECache{$lcTag} ? $RECache{$lcTag} : qr/\G<${lcTag}\b[^<>]*>\s*(?:<|$)/);
|
|
2310 |
if ($$OutR =~ /$RE/gc) {
|
|
2311 |
substr($$OutR, $LastTagPos) = '';
|
|
2312 |
($AddOutput, $Result) = (0, DEFANG_ALWAYS);
|
|
2313 |
last;
|
|
2314 |
} elsif ($$OutR =~ /\G<!--(?!KEEP)[^<>]*-->\s*(?:<|$)/gc && $LastTagPos > 0) {
|
|
2315 |
$LastTagPos = rindex($$OutR, '<', $LastTagPos-1);
|
|
2316 |
} else {
|
|
2317 |
last;
|
2093 | 2318 |
}
|
2094 | 2319 |
}
|
2095 | 2320 |
}
|
2096 | 2321 |
|
2097 | |
return DEFANG_NONE;
|
2098 | |
}
|
2099 | |
|
2100 | |
sub track_tag {
|
2101 | |
my ($Self, $IsEndTag, $Tag, $Attr) = @_;
|
2102 | |
my $lcTag = lc $Tag;
|
2103 | |
|
2104 | |
if ($IsEndTag) {
|
|
2322 |
if ($IsTagToFix) {
|
2105 | 2323 |
if ($lcTag eq $Self->{opened_tags}->[-1]->[0]) {
|
2106 | 2324 |
pop @{$Self->{opened_tags}};
|
2107 | 2325 |
$Self->{opened_tags_count}->{$lcTag}--;
|
2108 | 2326 |
} else {
|
2109 | 2327 |
warn "Unexpected tag stack. Expected $lcTag, found " . $Self->{opened_tags}->[-1]->[0];
|
2110 | 2328 |
}
|
2111 | |
} else {
|
2112 | |
push @{$Self->{opened_tags}}, [ $lcTag, $Attr ];
|
2113 | |
$Self->{opened_tags_count}->{$lcTag}++;
|
2114 | |
}
|
|
2329 |
}
|
|
2330 |
if ($AddOutput) {
|
|
2331 |
$$OutR .= "</$lcTag>";
|
|
2332 |
}
|
|
2333 |
|
|
2334 |
$Self->track_out_of_block_tags($OutR, $HtmlR, $lcTag)
|
|
2335 |
unless $NoBlockCheck;
|
|
2336 |
|
|
2337 |
return $Result;
|
2115 | 2338 |
}
|
2116 | 2339 |
|
2117 | |
sub close_tags {
|
2118 | |
my ($Self, $OutR) = @_;
|
|
2340 |
sub close_all_tags {
|
|
2341 |
my ($Self, $OutR, $HtmlR) = @_;
|
2119 | 2342 |
|
2120 | 2343 |
my $RemainingClosingTags = '';
|
2121 | 2344 |
|
2122 | |
my ($OpenedTags, $OpenedTagsCount) = @$Self{qw(opened_tags opened_tags_count)};
|
2123 | |
while (my $PreviousOpenedTag = pop @$OpenedTags) {
|
2124 | |
$RemainingClosingTags .= "<!-- close unclosed tag --></$PreviousOpenedTag->[0]>";
|
2125 | |
$OpenedTagsCount->{$PreviousOpenedTag}--;
|
2126 | |
}
|
2127 | |
$$OutR .= $RemainingClosingTags;
|
|
2345 |
my ($OpenedTags, $OpenedTagsCount, $Quiet)
|
|
2346 |
= @$Self{qw(opened_tags opened_tags_count quiet)};
|
|
2347 |
|
|
2348 |
for (reverse @$OpenedTags) {
|
|
2349 |
$$OutR .= "<!-- close unclosed tag -->" unless $Quiet;
|
|
2350 |
$Self->close_tag(1, $OutR, $HtmlR, $_->[0]);
|
|
2351 |
}
|
2128 | 2352 |
|
2129 | 2353 |
# Also clear implicit tags
|
2130 | 2354 |
$Self->{opened_nested_tags} = [];
|
|
2158 | 2382 |
=back
|
2159 | 2383 |
|
2160 | 2384 |
=cut
|
2161 | |
|
2162 | 2385 |
sub cleanup_attribute {
|
2163 | |
my ($Self, $Attr, $AttrKey, $AttrVal) = @_;
|
|
2386 |
my ($Self, $AttrVal) = @_;
|
2164 | 2387 |
|
2165 | 2388 |
return (undef, '') unless defined($AttrVal);
|
2166 | 2389 |
|
|
2180 | 2403 |
# These get requoted when we output the attribute
|
2181 | 2404 |
$AttrVal =~ s/&(quot|apos|amp|lt|gt);?/$EntityToChar{lc($1)} || warn "no entity for: $1"/egi;
|
2182 | 2405 |
|
2183 | |
my $AttrValStripped = $AttrVal;
|
|
2406 |
# Have to upgrade string to unicode string if entity expansion
|
|
2407 |
# resulted in non-ascii char
|
|
2408 |
utf8::upgrade($AttrVal) if $UnicodeEntity;
|
|
2409 |
|
|
2410 |
warn "cleanup_attribute AttrVal=$AttrVal" if $Self->{Debug};
|
|
2411 |
return $AttrVal;
|
|
2412 |
}
|
|
2413 |
|
|
2414 |
sub strip_attribute {
|
|
2415 |
my ($Self, $AttrVal) = @_;
|
2184 | 2416 |
|
2185 | 2417 |
# In JS, \u000a is unicode char (note \u0072 -> \u0072 -> r, so do HTML entities first)
|
2186 | 2418 |
# This can't be undone, so only do on stripped value
|
2187 | |
$AttrValStripped =~ s/\\u(0?[\da-f]{1,6});?/defined($1) && hex($1) < 1_114_111 && hex($1) != 65535 && !(hex($1) > 55295 && hex($1) < 57344) ? chr(hex($1)) : ""/egi;
|
|
2419 |
$AttrVal =~ s/\\u(0?[\da-f]{1,6});?/defined($1) && hex($1) < 1_114_111 && hex($1) != 65535 && !(hex($1) > 55295 && hex($1) < 57344) ? chr(hex($1)) : ""/egi;
|
2188 | 2420 |
|
2189 | 2421 |
# Also undo URL decoding for "stripped" value
|
2190 | 2422 |
# (can't do this above, because it's non-reversible, eg "http://...?a=%25" => "http://...?a=?",
|
2191 | 2423 |
# how would we know which ? to requote when outputting?)
|
2192 | |
$AttrValStripped =~ s/%([\da-f]{2})/chr(hex($1))/egi;
|
2193 | |
$AttrValStripped =~ s/[\x00-\x19]*//g;
|
2194 | |
$AttrValStripped =~ s/^\x20*//g; # http://ha.ckers.org/xss.html#XSS_Spaces_meta_chars
|
2195 | |
|
2196 | |
# Have to upgrade string to unicode string if entity expansion
|
2197 | |
# resulted in non-ascii char
|
2198 | |
utf8::upgrade($AttrVal) if $UnicodeEntity;
|
2199 | |
|
2200 | |
warn "cleanup_attribute AttrValStripped=$AttrVal" if $Self->{Debug};
|
2201 | |
return ($AttrVal, $AttrValStripped);
|
|
2424 |
$AttrVal =~ s/%([\da-f]{2})/chr(hex($1))/egi;
|
|
2425 |
$AttrVal =~ s/[\x00-\x19]+//g;
|
|
2426 |
$AttrVal =~ s/^[\x20]+//g; # http://ha.ckers.org/xss.html#XSS_Spaces_meta_chars
|
|
2427 |
|
|
2428 |
return $AttrVal;
|
2202 | 2429 |
}
|
2203 | 2430 |
|
2204 | 2431 |
sub get_applicable_charset {
|
|
2245 | 2472 |
|
2246 | 2473 |
=head1 COPYRIGHT AND LICENSE
|
2247 | 2474 |
|
2248 | |
Copyright (C) 2003-2010 by Opera Software Australia Pty Ltd
|
|
2475 |
Copyright (C) 2003-2013 by FastMail Pty Ltd
|
2249 | 2476 |
|
2250 | 2477 |
This library is free software; you can redistribute it and/or modify
|
2251 | 2478 |
it under the same terms as Perl itself.
|