Codebase list libhtml-copy-perl / e4af789
Merge tag 'upstream/1.31' Upstream version 1.31 gregor herrmann 10 years ago
6 changed file(s) with 95 addition(s) and 50 deletion(s). Raw diff Collapse all Expand all
00 Revision history for Perl extension HTML::Copy.
1
2 1.31 2013-06-27
3 * Remove devendencies on Cwd package.
4 * Broken links are not unescaped.
5 * Ignore template toolikit's variables in links.
6 * Fixed spelling mistakes in the document.
7 * Thanks to gregor herrmann.
8 * Fixed failing tests with Perl 5.18.
9 * Thanks to gregor hermman.
110
211 1.3 2008-02-20
312 * HTML::Copy can accept file handles instead file pathes.
0 # http://module-build.sourceforge.net/META-spec.html
1 #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
2 name: HTML-Copy
3 version: 1.3
4 version_from: lib/HTML/Copy.pm
5 installdirs: site
0 --- #YAML:1.0
1 name: HTML-Copy
2 version: 1.31
3 abstract: ~
4 author: []
5 license: unknown
6 distribution_type: module
7 configure_requires:
8 ExtUtils::MakeMaker: 0
9 build_requires:
10 ExtUtils::MakeMaker: 0
611 requires:
7 Class::Accessor: 0
8 HTML::Parser: 3.4
9 HTTP::Headers: 0
10 Test::More: 0
11 URI: 0
12
13 distribution_type: module
14 generated_by: ExtUtils::MakeMaker version 6.17
12 Class::Accessor: 0
13 HTML::Parser: 3.4
14 HTTP::Headers: 0
15 Test::More: 0
16 URI: 0
17 no_index:
18 directory:
19 - t
20 - inc
21 generated_by: ExtUtils::MakeMaker version 6.56
22 meta-spec:
23 url: http://module-build.sourceforge.net/META-spec-v1.4.html
24 version: 1.4
0 #!perl -w
0 #!/usr/bin/env perl -w
11 use 5.008;
22 use strict;
33 use ExtUtils::MakeMaker;
55 use HTML::Copy;
66 use Getopt::Long;
77 use Pod::Usage;
8 use Cwd;
98
10 our $VERSION = '1.3';
9 our $VERSION = '1.31';
1110
1211 {
1312 my $man = 0;
2019 if (@ARGV > 2) {
2120 pod2usage(-message => 'Too many arguments.',
2221 -exitstatus => 1, -verbose => 1)
23 }
22 }
2423
2524 if (@ARGV < 1) {
2625 pod2usage(-message => 'Required arguments is not given.',
55 use File::Spec;
66 use File::Basename;
77 use File::Path;
8 #use Cwd;
98 use utf8;
109 use Encode;
1110 use Encode::Guess;
1211 use Carp;
13 #use Data::Dumper;
1412
1513 use HTML::Parser 3.40;
1614 use HTML::HeadParser;
1715 use URI::file;
1816
19 use base qw(HTML::Parser Class::Accessor);
17 use base qw(HTML::Parser Class::Accessor::Fast);
2018
2119 __PACKAGE__->mk_accessors(qw(link_attributes
2220 has_base));
2321
2422 #use Data::Dumper;
2523
26 our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc');
27 # 'livesrc' and 'csref' are uesed in Adobe GoLive
24 our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc', 'user');
25 # 'livesrc', 'user' and 'csref' are uesed in Adobe GoLive
2826
2927 =head1 NAME
3028
3230
3331 =head1 VERSION
3432
35 Version 1.3
36
37 =cut
38
39 our $VERSION = '1.3';
33 Version 1.31
34
35 =cut
36
37 our $VERSION = '1.31';
4038
4139 =head1 SYMPOSIS
4240
111109
112110 $p = HTML::Copy->new($source);
113111
114 Make an instance of this module with specifing a source of HTML.
112 Make an instance of this module with specifying a source of HTML.
115113
116114 The argument $source can be a file path or a file handle. When a file handle is passed, you may need to indicate a file path of the passed file handle by the method L<"source_path">. If calling L<"source_path"> is omitted, it is assumed that the location of the file handle is the current working directory.
117115
126124 @$self{@keys} = @args{@keys};
127125 } else {
128126 my $file = shift @_;
129 if (!ref($file) && (ref(\$file) ne "GLOB")) {
127 my $ref = ref($file);
128 if ($ref =~ /^Path::Class::File/) {
129 $self->source_path($file);
130 } elsif (! $ref && (ref(\$file) ne 'GLOB')) {
130131 $self->source_path($file);
131132 } else {
132133 $self->source_handle($file);
135136
136137 $self->link_attributes(\@default_link_attributes);
137138 $self->has_base(0);
138
139 $self->attr_encoded(1);
139140 return $self;
140141 }
141142
249250 if ($self->{'encoding'}) {
250251 return $self->{'encoding'};
251252 }
252
253253 my $in = $self->source_handle;
254254 my $data = do {local $/; <$in>;};
255255 my $p = HTML::HeadParser->new;
288288 $p->io_layer;
289289 $p->io_layer(':utf8');
290290
291 Get and set PerlIO layer to read the source path and to write the destination path. Usualy it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
291 Get and set PerlIO layer to read the source path and to write the destination path. Usually it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
292292
293293 =cut
294294
311311 @suspects = $p->encode_sustects;
312312 $p->encode_suspects(qw/shiftjis euc-jp/);
313313
314 Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not requred to add suspects.
314 Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not required to add suspects.
315315
316316 =cut
317317
360360
361361 sub declaration { $_[0]->output("<!$_[1]>") }
362362 sub process { $_[0]->output($_[2]) }
363 sub comment { $_[0]->output("<!--$_[1]-->") }
364363 sub end { $_[0]->output($_[2]) }
365364 sub text { $_[0]->output($_[1]) }
365
366 sub comment {
367 my ($self, $comment) = @_;
368 if ($comment =~ /InstanceBegin template="([^"]+)"/) {
369 my $uri = URI->new($1);
370 my $newlink = $self->change_link($uri);
371 $comment = " InstanceBegin template=\"$newlink\" ";
372 }
373
374 $self->output("<!--$comment-->");
375 }
376
377 sub process_link {
378 my ($self, $link_path)= @_;
379 return undef if ($link_path =~ /^\$/);
380 return undef if ($link_path =~ /^\[%.*%\]$/);
381 my $uri = URI->new($link_path);
382 return undef if ($uri->scheme);
383 return $self->change_link($uri);
384 }
366385
367386 sub start {
368387 my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_;
375394 my $is_changed = 0;
376395 foreach my $an_attr (@{$self->link_attributes}) {
377396 if (exists($attr_dict->{$an_attr})){
378 my $link_path = $attr_dict->{$an_attr};
379 next if ($link_path =~ /^\$/);
380 my $uri = URI->new($link_path);
381 next if ($uri->scheme);
397 my $newlink = $self->process_link($attr_dict->{$an_attr});
398 next unless ($newlink);
399 $attr_dict->{$an_attr} = $newlink;
382400 $is_changed = 1;
383 $attr_dict->{$an_attr} = $self->change_link($uri);
384401 }
385402 }
386
403
404 if ($tag eq 'param') {
405 if ($attr_dict->{'name'} eq 'src') {
406 my $newlink = $self->process_link($attr_dict->{'value'});
407 if ($newlink) {
408 $attr_dict->{'value'} = $newlink;
409 $is_changed = 1;
410 }
411 }
412 }
413
387414 if ($is_changed) {
388415 my $attrs_text = $self->build_attributes($attr_dict, $attr_names);
389416 $tag_text = "<$tag $attrs_text>";
461488 $result_uri = $abs_uri->rel($self->destination_uri);
462489 } else {
463490 warn("$abs_path is not found.\nThe link to this path is not changed.\n");
464 $result_uri = $uri;
491 return "";
465492 }
466493
467494 return $result_uri->as_string;
477504
478505 if (@_) {
479506 $self->{'source_handle'} = shift @_;
480 }
481 elsif (!$self->{'source_handle'}) {
482 my $path = $self->source_path or croak "source_paht is undefined.";
507 } elsif (!$self->{'source_handle'}) {
508 my $path = $self->source_path or croak "source_path is undefined.";
483509 open my $in, "<", $path or croak "Can't open $path.";
484510 $self->{'source_handle'} = $in;
485511 }
55 use utf8;
66 use File::Spec::Functions;
77 #use Data::Dumper;
8 use Encode qw(encode_utf8 decode_utf8);
89
910 use Test::More tests => 16;
1011
108109 ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis");
109110
110111 ##== HTML with charset uft-8
111 my $src_html_utf8 = <<EOT;
112 my $src_html_utf8 = encode_utf8(<<EOT);
112113 <!DOCTYPE html>
113114 <html>
114115 <head>
125126 </html>
126127 EOT
127128
128 my $result_html_utf8 = <<EOT;
129 my $result_html_utf8 = encode_utf8(<<EOT);
129130 <!DOCTYPE html>
130131 <html>
131132 <head>
173174 read_and_unlink($destination, $p);
174175 };
175176
176 ok($copy_html eq $result_html_utf8, "copy_to giviing a file handle");
177 ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving a file handle");
177178
178179 ##=== copy_to gving file handles for input and output
179180 $copy_html = do {
186187 Encode::decode($p->encoding, $outdata);
187188 };
188189
189 ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output");
190 ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
190191
191192 ##=== parse_to giving a file handle
192193 $copy_html = do {
195196 $p->parse_to($destination);
196197 };
197198
198 ok($copy_html eq $result_html_utf8, "copy_to giviing file handles for input and output");
199 ok($copy_html eq decode_utf8($result_html_utf8), "copy_to giving file handles for input and output");
199200
200201 ##=== copy_to with directory destination
201202 $copy_html = do {