5 | 5 |
use File::Spec;
|
6 | 6 |
use File::Basename;
|
7 | 7 |
use File::Path;
|
8 | |
#use Cwd;
|
9 | 8 |
use utf8;
|
10 | 9 |
use Encode;
|
11 | 10 |
use Encode::Guess;
|
12 | 11 |
use Carp;
|
13 | |
#use Data::Dumper;
|
14 | 12 |
|
15 | 13 |
use HTML::Parser 3.40;
|
16 | 14 |
use HTML::HeadParser;
|
17 | 15 |
use URI::file;
|
18 | 16 |
|
19 | |
use base qw(HTML::Parser Class::Accessor);
|
|
17 |
use base qw(HTML::Parser Class::Accessor::Fast);
|
20 | 18 |
|
21 | 19 |
__PACKAGE__->mk_accessors(qw(link_attributes
|
22 | 20 |
has_base));
|
23 | 21 |
|
24 | 22 |
#use Data::Dumper;
|
25 | 23 |
|
26 | |
our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc');
|
27 | |
# 'livesrc' and 'csref' are uesed in Adobe GoLive
|
|
24 |
our @default_link_attributes = ('src', 'href', 'background', 'csref', 'livesrc', 'user');
|
|
25 |
# 'livesrc', 'user' and 'csref' are uesed in Adobe GoLive
|
28 | 26 |
|
29 | 27 |
=head1 NAME
|
30 | 28 |
|
|
32 | 30 |
|
33 | 31 |
=head1 VERSION
|
34 | 32 |
|
35 | |
Version 1.3
|
36 | |
|
37 | |
=cut
|
38 | |
|
39 | |
our $VERSION = '1.3';
|
|
33 |
Version 1.31
|
|
34 |
|
|
35 |
=cut
|
|
36 |
|
|
37 |
our $VERSION = '1.31';
|
40 | 38 |
|
41 | 39 |
=head1 SYMPOSIS
|
42 | 40 |
|
|
111 | 109 |
|
112 | 110 |
$p = HTML::Copy->new($source);
|
113 | 111 |
|
114 | |
Make an instance of this module with specifing a source of HTML.
|
|
112 |
Make an instance of this module with specifying a source of HTML.
|
115 | 113 |
|
116 | 114 |
The argument $source can be a file path or a file handle. When a file handle is passed, you may need to indicate a file path of the passed file handle by the method L<"source_path">. If calling L<"source_path"> is omitted, it is assumed that the location of the file handle is the current working directory.
|
117 | 115 |
|
|
126 | 124 |
@$self{@keys} = @args{@keys};
|
127 | 125 |
} else {
|
128 | 126 |
my $file = shift @_;
|
129 | |
if (!ref($file) && (ref(\$file) ne "GLOB")) {
|
|
127 |
my $ref = ref($file);
|
|
128 |
if ($ref =~ /^Path::Class::File/) {
|
|
129 |
$self->source_path($file);
|
|
130 |
} elsif (! $ref && (ref(\$file) ne 'GLOB')) {
|
130 | 131 |
$self->source_path($file);
|
131 | 132 |
} else {
|
132 | 133 |
$self->source_handle($file);
|
|
135 | 136 |
|
136 | 137 |
$self->link_attributes(\@default_link_attributes);
|
137 | 138 |
$self->has_base(0);
|
138 | |
|
|
139 |
$self->attr_encoded(1);
|
139 | 140 |
return $self;
|
140 | 141 |
}
|
141 | 142 |
|
|
249 | 250 |
if ($self->{'encoding'}) {
|
250 | 251 |
return $self->{'encoding'};
|
251 | 252 |
}
|
252 | |
|
253 | 253 |
my $in = $self->source_handle;
|
254 | 254 |
my $data = do {local $/; <$in>;};
|
255 | 255 |
my $p = HTML::HeadParser->new;
|
|
288 | 288 |
$p->io_layer;
|
289 | 289 |
$p->io_layer(':utf8');
|
290 | 290 |
|
291 | |
Get and set PerlIO layer to read the source path and to write the destination path. Usualy it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
|
|
291 |
Get and set PerlIO layer to read the source path and to write the destination path. Usually it was automatically determined by $source_path's charset tag. If charset is not specified, Encode::Guess module will be used.
|
292 | 292 |
|
293 | 293 |
=cut
|
294 | 294 |
|
|
311 | 311 |
@suspects = $p->encode_sustects;
|
312 | 312 |
$p->encode_suspects(qw/shiftjis euc-jp/);
|
313 | 313 |
|
314 | |
Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not requred to add suspects.
|
|
314 |
Add suspects of text encoding to guess the text encoding of the source HTML. If the source HTML have charset tag, it is not required to add suspects.
|
315 | 315 |
|
316 | 316 |
=cut
|
317 | 317 |
|
|
360 | 360 |
|
361 | 361 |
sub declaration { $_[0]->output("<!$_[1]>") }
|
362 | 362 |
sub process { $_[0]->output($_[2]) }
|
363 | |
sub comment { $_[0]->output("<!--$_[1]-->") }
|
364 | 363 |
sub end { $_[0]->output($_[2]) }
|
365 | 364 |
sub text { $_[0]->output($_[1]) }
|
|
365 |
|
|
366 |
sub comment {
|
|
367 |
my ($self, $comment) = @_;
|
|
368 |
if ($comment =~ /InstanceBegin template="([^"]+)"/) {
|
|
369 |
my $uri = URI->new($1);
|
|
370 |
my $newlink = $self->change_link($uri);
|
|
371 |
$comment = " InstanceBegin template=\"$newlink\" ";
|
|
372 |
}
|
|
373 |
|
|
374 |
$self->output("<!--$comment-->");
|
|
375 |
}
|
|
376 |
|
|
377 |
sub process_link {
|
|
378 |
my ($self, $link_path)= @_;
|
|
379 |
return undef if ($link_path =~ /^\$/);
|
|
380 |
return undef if ($link_path =~ /^\[%.*%\]$/);
|
|
381 |
my $uri = URI->new($link_path);
|
|
382 |
return undef if ($uri->scheme);
|
|
383 |
return $self->change_link($uri);
|
|
384 |
}
|
366 | 385 |
|
367 | 386 |
sub start {
|
368 | 387 |
my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_;
|
|
375 | 394 |
my $is_changed = 0;
|
376 | 395 |
foreach my $an_attr (@{$self->link_attributes}) {
|
377 | 396 |
if (exists($attr_dict->{$an_attr})){
|
378 | |
my $link_path = $attr_dict->{$an_attr};
|
379 | |
next if ($link_path =~ /^\$/);
|
380 | |
my $uri = URI->new($link_path);
|
381 | |
next if ($uri->scheme);
|
|
397 |
my $newlink = $self->process_link($attr_dict->{$an_attr});
|
|
398 |
next unless ($newlink);
|
|
399 |
$attr_dict->{$an_attr} = $newlink;
|
382 | 400 |
$is_changed = 1;
|
383 | |
$attr_dict->{$an_attr} = $self->change_link($uri);
|
384 | 401 |
}
|
385 | 402 |
}
|
386 | |
|
|
403 |
|
|
404 |
if ($tag eq 'param') {
|
|
405 |
if ($attr_dict->{'name'} eq 'src') {
|
|
406 |
my $newlink = $self->process_link($attr_dict->{'value'});
|
|
407 |
if ($newlink) {
|
|
408 |
$attr_dict->{'value'} = $newlink;
|
|
409 |
$is_changed = 1;
|
|
410 |
}
|
|
411 |
}
|
|
412 |
}
|
|
413 |
|
387 | 414 |
if ($is_changed) {
|
388 | 415 |
my $attrs_text = $self->build_attributes($attr_dict, $attr_names);
|
389 | 416 |
$tag_text = "<$tag $attrs_text>";
|
|
461 | 488 |
$result_uri = $abs_uri->rel($self->destination_uri);
|
462 | 489 |
} else {
|
463 | 490 |
warn("$abs_path is not found.\nThe link to this path is not changed.\n");
|
464 | |
$result_uri = $uri;
|
|
491 |
return "";
|
465 | 492 |
}
|
466 | 493 |
|
467 | 494 |
return $result_uri->as_string;
|
|
477 | 504 |
|
478 | 505 |
if (@_) {
|
479 | 506 |
$self->{'source_handle'} = shift @_;
|
480 | |
}
|
481 | |
elsif (!$self->{'source_handle'}) {
|
482 | |
my $path = $self->source_path or croak "source_paht is undefined.";
|
|
507 |
} elsif (!$self->{'source_handle'}) {
|
|
508 |
my $path = $self->source_path or croak "source_path is undefined.";
|
483 | 509 |
open my $in, "<", $path or croak "Can't open $path.";
|
484 | 510 |
$self->{'source_handle'} = $in;
|
485 | 511 |
}
|