Commit cfe18145a67399695b271db26bc365fbf14fe936 - libencode-zapcp1252-perl

New upstream version 0.40 Niko Tyni 3 years ago

11 changed file(s) with 258 addition(s) and 143 deletion(s). Raw diff Collapse all Expand all

-3

Build.PL less more

15	15	},
16	16	meta_merge => {
17	17	resources => {
18		homepage => 'http://search.cpan.org/dist/Encode-CP1252/',
19		bugtracker => 'http://github.com/theory/encode-cp1252/issues/',
20		repository => 'http://github.com/theory/encode-cp1252/tree',
	18	homepage => 'https://search.cpan.org/dist/Encode-ZapCP1252/',
	19	bugtracker => 'https://github.com/theory/encode-zapcp1252/issues/',
	20	repository => 'https://github.com/theory/encode-zapcp1252',
21	21	}
22	22	},
23	23	);

+14

-6

Changes less more

0	0	Revision history for Perl extension Encode::ZapCP1252
	1
	2	0.40 2020-02-04T15:30:50Z
	3	- Added link to Encoding::FixLatin. Suggested by Jonas Smedegaard.
	4	- Removed Pod tests from the distribution.
	5	- Fixed an issue discovered on Perl 5.31 that incorrectly resulted in
	6	the creation of malformed UTF-8 when fixing unicode strings. Thanks to
	7	Karl Williamson for the continuing improvement of Unicode support in
	8	Perl and for the fix to this module.
1	9
2	10	0.33 2011-11-23T05:19:36Z
3	11	- Require Test::Pod 1.41 to support `L<text\|url>` syntax in the Pod.

15	23	0.30 2010-06-12T18:05:38
16	24	- The conversion functions now ignore `undef` arguments and just return
17	25	without doing anything.
18		- Strings are no longer modifed in-place unless the conversion
	26	- Strings are no longer modified in-place unless the conversion
19	27	subroutines are called in a void context.
20		- The conversion functions may optionally be called with no arugment
	28	- The conversion functions may optionally be called with no argument
21	29	when run in Perl 5.10 or higher, in which case they will instead act
22	30	on `$_`.
23	31
24	32	0.20 2010-06-12T00:39:35
25		- Added `local` to examples of changing the maping tables.
26		- When the Encode module is insatlled, zapping and fixing CP1252
	33	- Added `local` to examples of changing the mapping tables.
	34	- When the Encode module is installed, zapping and fixing CP1252
27	35	gremlins now works in decoded strings, too.
28	36	- For convenience, the functions now return the strings they've
29	37	modified.
30	38	- Shipping with a traditional `Makefile.PL` rather than one that
31	39	passes through to Module::Build.
32		- Moved repository to [GitHub](http://github.com/theory/encode-zapcp1252).
	40	- Moved repository to [GitHub](https://github.com/theory/encode-zapcp1252).
33	41
34	42	0.12 2008-06-23T17:48:04
35	43	- Fixed pasto in the "Support" section of the docs.
36		- Fixed a typo in the "Synopsis" section of the docs, thaks to David
	44	- Fixed a typo in the "Synopsis" section of the docs, thanks to David
37	45	Beaudet.
38	46	- Fixed the 5.6.2 requirement to be properly detected in Perl 5.5.
39	47	Thanks to Slaven Rezic for the report.

-5

MANIFEST less more

0	0	Build.PL
1	1	Changes
2	2	lib/Encode/ZapCP1252.pm
	3	Makefile.PL
3	4	MANIFEST This list of files
4		README
	5	META.json
	6	META.yml
	7	README.md
5	8	t/base.t
6	9	t/decoded.t
7	10	t/perl-510.t
8		t/pod.t
9		Makefile.PL
10		META.yml
11		META.json

-8

META.json less more

3	3	"David E. Wheeler <david@justatheory.com>"
4	4	],
5	5	"dynamic_config" : 1,
6		"generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.112150",
	6	"generated_by" : "Module::Build version 0.4229",
7	7	"license" : [
8	8	"perl_5"
9	9	],
10	10	"meta-spec" : {
11	11	"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
12		"version" : "2"
	12	"version" : 2
13	13	},
14	14	"name" : "Encode-ZapCP1252",
15	15	"prereqs" : {

26	26	},
27	27	"runtime" : {
28	28	"recommends" : {
29		"Encode" : 0,
	29	"Encode" : "0",
30	30	"Test::Pod" : "1.41"
31	31	},
32	32	"requires" : {

37	37	"provides" : {
38	38	"Encode::ZapCP1252" : {
39	39	"file" : "lib/Encode/ZapCP1252.pm",
40		"version" : "0.33"
	40	"version" : "0.40"
41	41	}
42	42	},
43	43	"release_status" : "stable",
44	44	"resources" : {
45	45	"bugtracker" : {
46		"web" : "http://github.com/theory/encode-cp1252/issues/"
	46	"web" : "https://github.com/theory/encode-zapcp1252/issues/"
47	47	},
48		"homepage" : "http://search.cpan.org/dist/Encode-CP1252/",
	48	"homepage" : "https://search.cpan.org/dist/Encode-ZapCP1252/",
49	49	"license" : [
50	50	"http://dev.perl.org/licenses/"
51	51	],
52	52	"repository" : {
53		"url" : "http://github.com/theory/encode-cp1252/tree"
	53	"url" : "https://github.com/theory/encode-zapcp1252"
54	54	}
55	55	},
56		"version" : "0.33"
	56	"version" : "0.40",
	57	"x_serialization_backend" : "JSON::PP version 4.02"
57	58	}

+14

-13

META.yml less more

2	2	author:
3	3	- 'David E. Wheeler <david@justatheory.com>'
4	4	build_requires:
5		Module::Build: 0.36
6		Test::More: 0.17
	5	Module::Build: '0.36'
	6	Test::More: '0.17'
7	7	configure_requires:
8		Module::Build: 0.36
	8	Module::Build: '0.36'
9	9	dynamic_config: 1
10		generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.112150'
	10	generated_by: 'Module::Build version 0.4229, CPAN::Meta::Converter version 2.150010'
11	11	license: perl
12	12	meta-spec:
13	13	url: http://module-build.sourceforge.net/META-spec-v1.4.html
14		version: 1.4
	14	version: '1.4'
15	15	name: Encode-ZapCP1252
16	16	provides:
17	17	Encode::ZapCP1252:
18	18	file: lib/Encode/ZapCP1252.pm
19		version: 0.33
	19	version: '0.40'
20	20	recommends:
21		Encode: 0
22		Test::Pod: 1.41
	21	Encode: '0'
	22	Test::Pod: '1.41'
23	23	requires:
24		perl: 5.006002
	24	perl: '5.006002'
25	25	resources:
26		bugtracker: http://github.com/theory/encode-cp1252/issues/
27		homepage: http://search.cpan.org/dist/Encode-CP1252/
	26	bugtracker: https://github.com/theory/encode-zapcp1252/issues/
	27	homepage: https://search.cpan.org/dist/Encode-ZapCP1252/
28	28	license: http://dev.perl.org/licenses/
29		repository: http://github.com/theory/encode-cp1252/tree
30		version: 0.33
	29	repository: https://github.com/theory/encode-zapcp1252
	30	version: '0.40'
	31	x_serialization_backend: 'CPAN::Meta::YAML version 0.018'

-4

Makefile.PL less more

0		# Note: this file was auto-generated by Module::Build::Compat version 0.3800
	0	# Note: this file was auto-generated by Module::Build::Compat version 0.4229
1	1	require 5.006002;
2	2	use ExtUtils::MakeMaker;
3	3	WriteMakefile
4	4	(
5		'NAME' => 'Encode::ZapCP1252',
	5	'PL_FILES' => {},
	6	'INSTALLDIRS' => 'site',
6	7	'VERSION_FROM' => 'lib/Encode/ZapCP1252.pm',
7	8	'PREREQ_PM' => {
8	9	'Module::Build' => '0.36',
9	10	'Test::More' => '0.17'
10	11	},
11		'INSTALLDIRS' => 'site',
12	12	'EXE_FILES' => [],
13		'PL_FILES' => {}
	13	'NAME' => 'Encode::ZapCP1252'
14	14	)
15	15	;

-44

~~README~~ less more

0		Encode/CP1252 version 0.33
1		==========================
2
3		Have you ever been processing a Web form submit, assuming that the incoming
4		text was encoded in ISO-8859-1 (Latin-1), only to end up with a bunch of junk
5		because someone pasted in content from Microsoft Word? Well, this is because
6		Microsoft uses a superset of the Latin-1 encoding called "Windows Western" or
7		"CP1252". So mostly things will come out right, but a few things--like curly
8		quotes, m-dashes, ellipses, and the like--will not. The differences are
9		well-known; you see a nice chart at documenting the differences on
10		[Wikipedia](http://en.wikipedia.org/wiki/Windows-1252).
11
12		Of course, that won't really help you. So this library's module,
13		Encode::ZapCP1252, provides subroutines for removing Windows Western Gremlins
14		from strings, turning them into their appropriate UTF-8 or ASCII
15		approximations:
16
17		my $clean_latin1 = zap_cp1252 $latin1_text;
18		my $fixed_utf8 = fix_cp1252 $utf8_text;
19
20		Installation
21		------------
22
23		To install this module, type the following:
24
25		perl Build.PL
26		./Build
27		./Build test
28		./Build install
29
30		Or, if you don't have Module::Build installed, type the following:
31
32		perl Makefile.PL
33		make
34		make test
35		make install
36
37		Copyright and Licence
38		---------------------
39
40		Copyright (c) 2005-2010 David E. Wheeler. Some Rights Reserved.
41
42		This module is free software; you can redistribute it and/or modify it under
43		the same terms as Perl itself.

+47

-0

README.md less more

	0	Encode/CP1252 version 0.40
	1	==========================
	2
	3	[![CPAN version](https://badge.fury.io/pl/Encode-CP1252.svg)](https://badge.fury.io/pl/Encode-CP1252)
	4	[![Build Status](https://github.com/theory/encode-zapcp1252/workflows/CI/badge.svg)](/theory/encode-zapcp1252/actions/)
	5
	6	Have you ever been processing a Web form submit, assuming that the incoming
	7	text was encoded in ISO-8859-1 (Latin-1), only to end up with a bunch of junk
	8	because someone pasted in content from Microsoft Word? Well, this is because
	9	Microsoft uses a superset of the Latin-1 encoding called "Windows Western" or
	10	"CP1252". So mostly things will come out right, but a few things--like curly
	11	quotes, m-dashes, ellipses, and the like--will not. The differences are
	12	well-known; you see a nice chart at documenting the differences on
	13	[Wikipedia](https://en.wikipedia.org/wiki/Windows-1252).
	14
	15	Of course, that won't really help you. So this library's module,
	16	Encode::ZapCP1252, provides subroutines for removing Windows Western Gremlins
	17	from strings, turning them into their appropriate UTF-8 or ASCII
	18	approximations:
	19
	20	my $clean_latin1 = zap_cp1252 $latin1_text;
	21	my $fixed_utf8 = fix_cp1252 $utf8_text;
	22
	23	Installation
	24	------------
	25
	26	To install this module, type the following:
	27
	28	perl Build.PL
	29	./Build
	30	./Build test
	31	./Build install
	32
	33	Or, if you don't have Module::Build installed, type the following:
	34
	35	perl Makefile.PL
	36	make
	37	make test
	38	make install
	39
	40	Copyright and Licence
	41	---------------------
	42
	43	Copyright (c) 2005-2020 David E. Wheeler. Some Rights Reserved.
	44
	45	This module is free software; you can redistribute it and/or modify it under
	46	the same terms as Perl itself.

+150

-49

lib/Encode/ZapCP1252.pm less more

4	4	use vars qw($VERSION @ISA @EXPORT);
5	5	use 5.006_002;
6	6
7		$VERSION = '0.33';
	7	$VERSION = '0.40';
8	8	@ISA = qw(Exporter);
9	9	@EXPORT = qw(zap_cp1252 fix_cp1252);
10	10	use constant PERL588 => $] >= 5.008_008;
11		require Encode if PERL588;
	11	use Encode ();
12	12
13	13	our %ascii_for = (
14		# http://en.wikipedia.org/wiki/Windows-1252
	14	# https://en.wikipedia.org/wiki/Windows-1252
15	15	"\x80" => 'e', # EURO SIGN
16	16	"\x82" => ',', # SINGLE LOW-9 QUOTATION MARK
17	17	"\x83" => 'f', # LATIN SMALL LETTER F WITH HOOK

42	42	);
43	43
44	44	our %utf8_for = (
45		# http://en.wikipedia.org/wiki/Windows-1252
	45	# https://en.wikipedia.org/wiki/Windows-1252
46	46	"\x80" => '€', # EURO SIGN
47	47	"\x82" => ',', # SINGLE LOW-9 QUOTATION MARK
48	48	"\x83" => 'ƒ', # LATIN SMALL LETTER F WITH HOOK

72	72	"\x9f" => 'Ÿ', # LATIN CAPITAL LETTER Y WITH DIAERESIS
73	73	);
74	74
	75	my @utf8_skip = (
	76	# This translates a utf-8-encoded byte into how many bytes the full utf8
	77	# character occupies. Illegal start bytes have a negative count.
	78
	79	# UTF-8 is a variable-length encoding. The 128 ASCII characters were very
	80	# deliberately set to be themselves, so UTF-8 would be backwards compatible
	81	# with 7-bit applications. Every other character has 2 - 13 bytes comprising
	82	# it.
	83	#
	84	# If the first bit of the first byte in a character is 0, it is one of those
	85	# 128 ASCII characters with length 1.
	86
	87	# Otherwise, the first bit is 1, and if the second bit is also one, this byte
	88	# starts the sequence of bytes that represent the character. The bytes C0-FF
	89	# have the characteristic that the first two bits are both one. The number of
	90	# bytes that form a character corresponds to the number of consecutive leading
	91	# bits that are all one in the start byte. In the case of FE, the first 7
	92	# bits are one, so the number of bytes in the character it represents is 7.
	93	# FF is a special case, and Perl has arbitrarily set it to 13 instead of the
	94	# expected 8.
	95	#
	96	# The remaining bytes begin with '10', from 80..9F. They are called
	97	# continuation bytes, and a UTF-8 character is comprised of a start byte
	98	# indicating 'n' bytes total in it, then 'n-1' of these continuation bytes.
	99	# What the character is that each sequence represents is derived by shifting
	100	# and adding the other bits in the bytes. (C0 and C1 aren't actually legal
	101	# start bytes for security reasons that need not concern us here, hence are
	102	# marked as negative in the table below.)
	103
	104	# 0 1 2 3 4 5 6 7 8 9 A B C D E F
	105	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0
	106	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 1
	107	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 2
	108	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 3
	109	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 4
	110	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 5
	111	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 6
	112	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 7
	113	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # 8
	114	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # 9
	115	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # A
	116	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # B
	117	-1,-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # C
	118	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # D
	119	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, # E
	120	4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7,13, # F
	121	);
	122
75	123	BEGIN {
76	124	my $proto = $] >= 5.010000 ? '_' : '$';
77	125	eval "sub zap_cp1252($proto) { unshift \@_, \\%ascii_for; &_tweakit; }";
78	126	eval "sub fix_cp1252($proto) { unshift \@_, \\%utf8_for; &_tweakit; }";
79	127	}
80	128
	129	# These are the bytes that CP1252 redefines
	130	my $cp1252_re = qr/[\x80\x82-\x8c\x8e\x91-\x9c\x9e\x9f]/;
	131
81	132	sub _tweakit {
82	133	my $table = shift;
83	134	return unless defined $_[0];
84	135	local $_[0] = $_[0] if defined wantarray;
85		if (PERL588 && Encode::is_utf8($_[0])) {
86		_tweak_decoded($table, $_[0]);
87		} else {
88		$_[0] =~ s{([\x80-\x9f])}{$table->{$1} \|\| $1}emxsg;
	136	my $is_utf8 = PERL588 && Encode::is_utf8($_[0]);
	137	my $valid_utf8 = $is_utf8 && utf8::valid($_[0]);
	138	if (!$is_utf8) {
	139
	140	# Here is non-UTF-8. Change the 1252 characters to their UTF-8
	141	# counterparts. These bytes are very rarely used in real world
	142	# applications, so their presence likely indicates that CP1252 was
	143	# meant.
	144	$_[0] =~ s/($cp1252_re)/$table->{$1}/gems;
	145	} elsif ($valid_utf8) {
	146
	147	# Here is well-formed Perl extended UTF-8 and has the UTF-8 flag on
	148	# and the string is held as bytes. Change the 1252 characters to their
	149	# Unicode counterparts.
	150	$_[0] =~ s/($cp1252_re)/Encode::decode_utf8($table->{$1})/gems;
	151	} else { # Invalid UTF-8. Look for single-byte CP1252 gremlins
	152
	153	# Turn off the UTF-8 flag so that we can go through the string
	154	# byte-by-byte.
	155	Encode::_utf8_off($_[0]);
	156
	157	my $i = 0;
	158	my $length = length $_[0];
	159	my $fixed = ""; # The input after being fixed up by this loop
	160	while ($i < $length) {
	161
	162	# Each time through the loop, we should here be ready to look at a
	163	# new character, and it's 0th byte is called a 'start byte'
	164	my $start_byte = substr($_[0], $i, 1);
	165	my $skip = $utf8_skip[ord $start_byte];
	166
	167	# The table is set up so that legal UTF-8 start bytes have a
	168	# positive byte length. Simply add all the bytes in the character
	169	# to the output, and go on to handle the next character in the
	170	# next loop iteration.
	171	if ($skip > 0) {
	172	$fixed .= substr($_[0], $i, $skip);
	173	$i += $skip;
	174	next;
	175	}
	176
	177	# Here we have a byte that isn't a start byte in a position that
	178	# should oughta be a start byte. The whole point of this loop is
	179	# to find such bytes that are CP1252 ones and which were
	180	# incorrectly inserted by the upstream process into an otherwise
	181	# valid UTF-8 string. So, if we have such a one, change it into
	182	# its corresponding correct character.
	183	if ($start_byte =~ s/($cp1252_re)/$table->{$1}/ems) {
	184
	185	# The correct character may be UTF-8 bytes. We treat them as
	186	# just a sequence of non-UTF-8 bytes, because that's what
	187	# $fixed has in it so far. After everything is consistently
	188	# added, we turn the UTF-8 flag back on before returning at
	189	# the end.
	190	Encode::_utf8_off($start_byte);
	191	$fixed .= $start_byte;
	192	$i++;
	193	next;
	194	}
	195
	196	# Here the byte isn't a CP1252 one.
	197	die "Unexpected continuation byte: %02x", ord $start_byte;
	198	}
	199
	200	# $fixed now has everything properly in it, but set to return it in
	201	# $_[0], marked as UTF-8.
	202	$_[0] = $fixed;
	203	Encode::_utf8_on($_[0]);
89	204	}
90	205	return $_[0] if defined wantarray;
91		}
92
93		sub _tweak_decoded {
94		my $table = shift;
95		local $@;
96		# First, try to replace in the decoded string.
97		eval {
98		$_[0] =~ s{([\x80-\x9f])}{
99		$table->{$1} ? Encode::decode('UTF-8', $table->{$1}) : $1
100		}emxsg
101		};
102		if (my $err = $@) {
103		# If we got a "Malformed UTF-8 character" error, then someone
104		# likely turned on the utf8 flag without decoding. So turn it off.
105		# and try again.
106		die if $err !~ /Malformed/;
107		Encode::_utf8_off($_[0]);
108		$_[0] =~ s/([\x80-\x9f])/$table->{$1} \|\| $1/emxsg;
109		Encode::_utf8_on($_[0]);
110		}
111	206	}
112	207
113	208	1;

141	236	encoding is Latin-1, mostly things will come out right, but a few things--like
142	237	curly quotes, m-dashes, ellipses, and the like--may not. The differences are
143	238	well-known; you see a nice chart at documenting the differences on
144		L<Wikipedia\|http://en.wikipedia.org/wiki/Windows-1252>.
	239	L<Wikipedia\|https://en.wikipedia.org/wiki/Windows-1252>.
145	240
146	241	Of course, that won't really help you. What will help you is to quit using
147	242	Latin-1 and switch to UTF-8. Then you can just convert from CP1252 to UTF-8

160	255	gremlins mixed in with properly encoded characters. I've seen examples of just
161	256	this sort of thing when processing GMail messages and attempting to insert
162	257	them into a UTF-8 database, as well as in some feeds processed by, say
163		L<Yahoo! Pipes\|http://pipes.yahoo.com>. Doesn't work so well. For such cases,
164		there's C<fix_cp1252>, which converts those CP1252 gremlins into their UTF-8
165		equivalents.
	258	Yahoo! Pipes. Doesn't work so well. For such cases, there's C<fix_cp1252>,
	259	which converts those CP1252 gremlins into their UTF-8 equivalents.
166	260
167	261	=head1 Usage
168	262

186	280
187	281	In this case, even constant values can be processed. Either way, C<undef>s
188	282	will be ignored.
	283
	284	In Perl 5.10 and higher, the functions may optionally be called with no
	285	arguments, in which case C<$_> will be converted, instead:
	286
	287	zap_cp1252; # Modify $_ in-place.
	288	fix_cp1252; # Modify $_ in-place.
	289	my $zapped = zap_cp1252; # Copy $_ and return zapped
	290	my $fixed = zap_cp1252; # Copy $_ and return fixed
189	291
190	292	In Perl 5.8.8 and higher, the conversion will work even when the string is
191	293	decoded to Perl's internal form (usually via C<decode 'ISO-8859-1', $text>) or

196	298	removing those CP1252 gremlins no matter what kind of processing has already
197	299	been executed on the string.
198	300
199		In Perl 5.10 and higher, the functions may optionally be called with no
200		arguments, in which case C<$_> will be converted, instead:
201
202		zap_cp1252; # Modify $_ in-place.
203		fix_cp1252; # Modify $_ in-place.
204		my $zapped = zap_cp1252; # Copy $_ and return zapped
205		my $fixed = zap_cp1252; # Copy $_ and return fixed
	301	That said, although C<fix_cp1252()> takes a conservative approach to replacing
	302	text in Unicode strings, it should be used as a very last option. Really,
	303	avoid that situation if you can.
206	304
207	305	=head1 Conversion Table
208	306

250	348
251	349	local $Encode::ZapCP1252::ascii_for{"\x80"} = 'E';
252	350
253		Or if, for some bizarre reason, you wanted the UTF-8 equivalent for a bullet
254		converted by C<fix_cp1252()> to really be an asterisk (why would you? Just use
255		C<zap_cp1252> for that!), you can do this:
256
257		local $Encode::ZapCP1252::utf8_for{"\x95"} = '*';
	351	Or if, for some reason, you wanted the UTF-8 equivalent for a bullet
	352	converted by C<fix_cp1252()> to be a black square, you can assign the
	353	bytes (never a Unicode string) like so:
	354
	355	local $Encode::ZapCP1252::utf8_for{"\x95"} = Encode::encode_utf8('■');
258	356
259	357	Just remember, without C<local> this would be a global change. In that case,
260	358	be careful if your code zaps CP1252 elsewhere. Of course, it shouldn't really

268	366
269	367	=item L<Encode>
270	368
271		=item L<Wikipedia: Windows-1252\|http://en.wikipedia.org/wiki/Windows-1252>
	369	=item L<Encoding::FixLatin>
	370
	371	=item L<Wikipedia: Windows-1252\|https://en.wikipedia.org/wiki/Windows-1252>
272	372
273	373	=back
274	374
275	375	=head1 Support
276	376
277	377	This module is stored in an open L<GitHub
278		repository\|http://github.com/theory/encode-cp1252/tree/>. Feel free to fork
	378	repository\|https://github.com/theory/encode-zapcp1252/>. Feel free to fork
279	379	and contribute!
280	380
281	381	Please file bug reports via L<GitHub
282		Issues\|http://github.com/theory/encode-cp1252/issues/> or by sending mail to
	382	Issues\|https://github.com/theory/encode-zapcp1252/issues/> or by sending mail to
283	383	L<bug-Encode-CP1252@rt.cpan.org\|mailto:bug-Encode-CP1252@rt.cpan.org>.
284	384
285	385	=head1 Author

289	389	=head1 Acknowledgments
290	390
291	391	My thanks to Sean Burke for sending me his original method for converting
292		CP1252 gremlins to more-or-less appropriate ASCII characters.
	392	CP1252 gremlins to more-or-less appropriate ASCII characters, and to Karl
	393	Williamson for more correct handling of Unicode strings.
293	394
294	395	=head1 Copyright and License
295	396
296		Copyright (c) 2005-2010 David E. Wheeler. Some Rights Reserved.
	397	Copyright (c) 2005-2020 David E. Wheeler. Some Rights Reserved.
297	398
298	399	This module is free software; you can redistribute it and/or modify it under the
299	400	same terms as Perl itself.

+13

-2

t/decoded.t less more

5	5	BEGIN {
6	6	plan skip_all => 'These tests require Perl 5.8.8 or higher'
7	7	unless $] >= 5.008_008;
8		plan tests => 6;
	8	plan tests => 10;
9	9	}
10	10
11	11	BEGIN { use_ok 'Encode::ZapCP1252' or die; }

14	14	my $ascii = q{e , f ,, ... + ++ ^ % S < OE Z ' ' " " * - -- ~ (tm) s > oe z Y};
15	15	my $utf8 = q{€ , ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ};
16	16
17		# Test conversion of decoded from ISO-8859-1.
	17	# Test conversion of text decoded from ISO-8859-1.
18	18	my $fix_me = Encode::decode(
19	19	'ISO-8859-1',
20	20	join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f

44	44	is $fix_me, $ascii, 'Convert utf8-bit-flipped to ascii';
45	45
46	46	# Test conversion to decoded with modified table.
	47	my $euro = $Encode::ZapCP1252::utf8_for{"\x80"};
47	48	$Encode::ZapCP1252::utf8_for{"\x80"} = 'E';
48	49	$utf8 =~ s/€/E/;
49	50

55	56	fix_cp1252 $fix_me;
56	57	is $fix_me, $utf8, 'Convert decoded from Latin-1 with modified table';
57	58
	59	# Test it with the valid use of one of the gremlins (π is [0xcf,0x80]) in UTF-8.
	60	is fix_cp1252 'π', 'π', 'Should not convert valid use of 0x80';
	61	is zap_cp1252 'π', 'π', 'Should not zap valid use of 0x80';
58	62
	63	# But it should convert it if it's not UTF-8.
	64	my $utf8_euro = Encode::encode_utf8($euro);
	65	$Encode::ZapCP1252::utf8_for{"\x80"} = $utf8_euro;
	66	is fix_cp1252 "\xCF\x80", "\xCF" . $utf8_euro,
	67	'Should convert 0x80 when not parsing UTF-8';
	68	is zap_cp1252 "\xCF\x80", qq{\xCF$Encode::ZapCP1252::ascii_for{"\x80"}},
	69	'Should convert 0x80 to ASCII when not parsing UTF-8';

-9

~~t/pod.t~~ less more

0		#!perl -w
1
2		use strict;
3		use Test::More;
4		eval 'use Test::Pod 1.41';
5		plan skip_all => 'Test::Pod 1.41 required for testing POD' if $@;
6		eval 'use Encode';
7		plan skip_all => 'Encode 1.20 required for testing POD because it has UTF-8 characters' if $@;
8		all_pod_files_ok();