Codebase list libfurl-perl / 7c52fea
Added Furl::Response#encoding tokuhirom 11 years ago
8 changed file(s) with 169 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
1010 requires 'Test::More' => 0.96; # done_testing, subtest
1111 requires 'Test::TCP' => 1.06;
1212 requires 'Test::Requires';
13 requires 'Test::Fake::HTTPD';
1314 };
1415
1516 on configure => sub {
7878 sub is_success { substr( $_[0]->code, 0, 1 ) eq '2' }
7979 sub status_line { $_[0]->code . ' ' . $_[0]->message }
8080
81 sub charset {
82 my $self = shift;
83
84 return $self->{__charset} if exists $self->{__charset};
85 if ($self->can('content_charset')){
86 # To suppress:
87 # Parsing of undecoded UTF-8 will give garbage when decoding entities
88 local $SIG{__WARN__} = sub {};
89 my $charset = $self->content_charset;
90 $self->{__charset} = $charset;
91 return $charset;
92 }
93
94 my $content_type = $self->headers->header('Content-Type');
95 return unless $content_type;
96 $content_type =~ /charset=([A-Za-z0-9_\-]+)/io;
97 $self->{__charset} = $1 || undef;
98
99 # Detect charset from HTML
100 unless (defined($self->{__charset}) && $self->content_type =~ m{text/html}) {
101 # I guess, this is not so perfect regexp. patches welcome.
102 #
103 # <meta http-equiv="Content-Type" content="text/html; charset=EUC-JP"/>
104 $self->content =~ m!<meta\s+http-equiv\s*=["']Content-Type["']\s+content\s*=\s*["']text/html;\s*charset=([^'">/]+)['"]\s*/?>!smi;
105 $self->{__charset} = $1;
106 }
107
108 $self->{__charset};
109 }
110
111 sub encoder {
112 require Encode;
113 my $self = shift;
114 return $self->{__encoder} if exists $self->{__encoder};
115 my $charset = $self->charset or return;
116 my $enc = Encode::find_encoding($charset);
117 $self->{__encoder} = $enc;
118 }
119
120 sub encoding {
121 my $enc = shift->encoder or return;
122 $enc->name;
123 }
124
81125 1;
82126 __END__
83127
0 #!perl
1
2 use strict;
3 use warnings;
4 use Furl;
5 use File::Spec;
6 use Encode;
7 use Cwd;
8 use URI;
9 use Test::More tests => 13;
10 use Test::TCP;
11 use Test::Fake::HTTPD;
12
13 my $ua = Furl->new;
14 my $cwd = getcwd;
15
16 #BEGIN{
17 # package LWP::Protocol;
18 # $^W = 0;
19 #}
20
21 my $httpd = run_http_server {
22 my $req = shift;
23 my $path = 't/400_components/001_response-coding' . $req->uri->path;
24 open my $fh, '<', $path or die "$path: $!";
25 return [ 200, [ 'Content-Type' => 'text/html' ], $fh ];
26 };
27 note $httpd->host_port;
28
29 for my $meth (qw/charset encoder encoding decoded_content/){
30 can_ok('Furl::Response', $meth);
31 }
32
33 my %charset = qw(
34 UTF-8 utf-8-strict;
35 EUC-JP EUC-JP
36 Shift_JIS SHIFT_JIS
37 ISO-2022-JP ISO-2022-JP
38 );
39
40 my %filename = qw(
41 UTF-8 t-utf-8.html
42 EUC-JP t-euc-jp.html
43 Shift_JIS t-shiftjis.html
44 ISO-2022-JP t-iso-2022-jp.html
45 );
46
47 for my $charset (sort keys %charset){
48 my $uri = URI->new('http://' . $httpd->host_port);
49 $uri->path(File::Spec->catfile($filename{$charset}));
50 my $res;
51 {
52 local $^W = 0; # to quiet LWP::Protocol
53 $res = $ua->get($uri);
54 }
55 die unless $res->is_success;
56 is $res->charset, $charset, "\$res->charset eq '$charset'";
57 my $canon = find_encoding($charset)->name;
58 is $res->encoding, $canon, "\$res->encoding eq '$canon'";
59 }
60
61 my $uri = URI->new('http://' . $httpd->host_port);
62 $uri->path("t-null.html");
63 my $res = $ua->get($uri);
64 die unless $res->is_success;
65 if (defined $res->encoding){
66 is $res->encoding, "ascii", "res->encoding is ascii";
67 }else{
68 ok !$res->encoding, "res->encoding is undef";
69 }
0 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
1 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2 <html xmlns="http://www.w3.org/1999/xhtml">
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=EUC-JP"/>
5 <title>Test</title>
6 </head>
7 <body>
8 <p>漢字、カタカナ、ひらがなの入ったhtml.</p>
9 </body>
10 </html>
0 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
1 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2 <html xmlns="http://www.w3.org/1999/xhtml">
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=ISO-2022-JP"/>
5 <title>Test</title>
6 </head>
7 <body>
8 <p>$B4A;z!"%+%?%+%J!"$R$i$,$J$NF~$C$?(Bhtml.</p>
9 </body>
10 </html>
0 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
1 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2 <html xmlns="http://www.w3.org/1999/xhtml">
3 <head>
4 <title>Test</title>
5 </head>
6 <body>
7 <p>The quick brown fox jumps over the black lazy dog.</p>
8 </body>
9 </html>
0 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
1 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2 <html xmlns="http://www.w3.org/1999/xhtml">
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS"/>
5 <title>Test</title>
6 </head>
7 <body>
8 <p>漢字、カタカナ、ひらがなの入ったhtml.</p>
9 </body>
10 </html>
0 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
1 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2 <html xmlns="http://www.w3.org/1999/xhtml">
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
5 <title>Test</title>
6 </head>
7 <body>
8 <p>漢字、カタカナ、ひらがなの入ったhtml.</p>
9 </body>
10 </html>