Added Furl::Response#encoding
tokuhirom
11 years ago
10 | 10 | requires 'Test::More' => 0.96; # done_testing, subtest |
11 | 11 | requires 'Test::TCP' => 1.06; |
12 | 12 | requires 'Test::Requires'; |
13 | requires 'Test::Fake::HTTPD'; | |
13 | 14 | }; |
14 | 15 | |
15 | 16 | on configure => sub { |
78 | 78 | sub is_success { substr( $_[0]->code, 0, 1 ) eq '2' } |
79 | 79 | sub status_line { $_[0]->code . ' ' . $_[0]->message } |
80 | 80 | |
81 | sub charset { | |
82 | my $self = shift; | |
83 | ||
84 | return $self->{__charset} if exists $self->{__charset}; | |
85 | if ($self->can('content_charset')){ | |
86 | # To suppress: | |
87 | # Parsing of undecoded UTF-8 will give garbage when decoding entities | |
88 | local $SIG{__WARN__} = sub {}; | |
89 | my $charset = $self->content_charset; | |
90 | $self->{__charset} = $charset; | |
91 | return $charset; | |
92 | } | |
93 | ||
94 | my $content_type = $self->headers->header('Content-Type'); | |
95 | return unless $content_type; | |
96 | $content_type =~ /charset=([A-Za-z0-9_\-]+)/io; | |
97 | $self->{__charset} = $1 || undef; | |
98 | ||
99 | # Detect charset from HTML | |
100 | unless (defined($self->{__charset}) && $self->content_type =~ m{text/html}) { | |
101 | # I guess, this is not so perfect regexp. patches welcome. | |
102 | # | |
103 | # <meta http-equiv="Content-Type" content="text/html; charset=EUC-JP"/> | |
104 | $self->content =~ m!<meta\s+http-equiv\s*=["']Content-Type["']\s+content\s*=\s*["']text/html;\s*charset=([^'">/]+)['"]\s*/?>!smi; | |
105 | $self->{__charset} = $1; | |
106 | } | |
107 | ||
108 | $self->{__charset}; | |
109 | } | |
110 | ||
111 | sub encoder { | |
112 | require Encode; | |
113 | my $self = shift; | |
114 | return $self->{__encoder} if exists $self->{__encoder}; | |
115 | my $charset = $self->charset or return; | |
116 | my $enc = Encode::find_encoding($charset); | |
117 | $self->{__encoder} = $enc; | |
118 | } | |
119 | ||
120 | sub encoding { | |
121 | my $enc = shift->encoder or return; | |
122 | $enc->name; | |
123 | } | |
124 | ||
81 | 125 | 1; |
82 | 126 | __END__ |
83 | 127 |
0 | #!perl | |
1 | ||
2 | use strict; | |
3 | use warnings; | |
4 | use Furl; | |
5 | use File::Spec; | |
6 | use Encode; | |
7 | use Cwd; | |
8 | use URI; | |
9 | use Test::More tests => 13; | |
10 | use Test::TCP; | |
11 | use Test::Fake::HTTPD; | |
12 | ||
13 | my $ua = Furl->new; | |
14 | my $cwd = getcwd; | |
15 | ||
16 | #BEGIN{ | |
17 | # package LWP::Protocol; | |
18 | # $^W = 0; | |
19 | #} | |
20 | ||
21 | my $httpd = run_http_server { | |
22 | my $req = shift; | |
23 | my $path = 't/400_components/001_response-coding' . $req->uri->path; | |
24 | open my $fh, '<', $path or die "$path: $!"; | |
25 | return [ 200, [ 'Content-Type' => 'text/html' ], $fh ]; | |
26 | }; | |
27 | note $httpd->host_port; | |
28 | ||
29 | for my $meth (qw/charset encoder encoding decoded_content/){ | |
30 | can_ok('Furl::Response', $meth); | |
31 | } | |
32 | ||
33 | my %charset = qw( | |
34 | UTF-8 utf-8-strict; | |
35 | EUC-JP EUC-JP | |
36 | Shift_JIS SHIFT_JIS | |
37 | ISO-2022-JP ISO-2022-JP | |
38 | ); | |
39 | ||
40 | my %filename = qw( | |
41 | UTF-8 t-utf-8.html | |
42 | EUC-JP t-euc-jp.html | |
43 | Shift_JIS t-shiftjis.html | |
44 | ISO-2022-JP t-iso-2022-jp.html | |
45 | ); | |
46 | ||
47 | for my $charset (sort keys %charset){ | |
48 | my $uri = URI->new('http://' . $httpd->host_port); | |
49 | $uri->path(File::Spec->catfile($filename{$charset})); | |
50 | my $res; | |
51 | { | |
52 | local $^W = 0; # to quiet LWP::Protocol | |
53 | $res = $ua->get($uri); | |
54 | } | |
55 | die unless $res->is_success; | |
56 | is $res->charset, $charset, "\$res->charset eq '$charset'"; | |
57 | my $canon = find_encoding($charset)->name; | |
58 | is $res->encoding, $canon, "\$res->encoding eq '$canon'"; | |
59 | } | |
60 | ||
61 | my $uri = URI->new('http://' . $httpd->host_port); | |
62 | $uri->path("t-null.html"); | |
63 | my $res = $ua->get($uri); | |
64 | die unless $res->is_success; | |
65 | if (defined $res->encoding){ | |
66 | is $res->encoding, "ascii", "res->encoding is ascii"; | |
67 | }else{ | |
68 | ok !$res->encoding, "res->encoding is undef"; | |
69 | } |
0 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
1 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
2 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
3 | <head> | |
4 | <meta http-equiv="Content-Type" content="text/html; charset=EUC-JP"/> | |
5 | <title>Test</title> | |
6 | </head> | |
7 | <body> | |
8 | <p>漢字、カタカナ、ひらがなの入ったhtml.</p> | |
9 | </body> | |
10 | </html> |
0 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
1 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
2 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
3 | <head> | |
4 | <meta http-equiv="Content-Type" content="text/html; charset=ISO-2022-JP"/> | |
5 | <title>Test</title> | |
6 | </head> | |
7 | <body> | |
8 | <p>$B4A;z!"%+%?%+%J!"$R$i$,$J$NF~$C$?(Bhtml.</p> | |
9 | </body> | |
10 | </html> |
0 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
1 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
2 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
3 | <head> | |
4 | <title>Test</title> | |
5 | </head> | |
6 | <body> | |
7 | <p>The quick brown fox jumps over the black lazy dog.</p> | |
8 | </body> | |
9 | </html> |
0 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
1 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
2 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
3 | <head> | |
4 | <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS"/> | |
5 | <title>Test</title> | |
6 | </head> | |
7 | <body> | |
8 | <p>漢字、カタカナ、ひらがなの入ったhtml.</p> | |
9 | </body> | |
10 | </html> |
0 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
1 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
2 | <html xmlns="http://www.w3.org/1999/xhtml"> | |
3 | <head> | |
4 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> | |
5 | <title>Test</title> | |
6 | </head> | |
7 | <body> | |
8 | <p>漢字、カタカナ、ひらがなの入ったhtml.</p> | |
9 | </body> | |
10 | </html> |