Commit a8fc51cb80d24e7e522985a8e6c63e59fe35c3c9 - liblingua-en-sentence-perl

Import upstream version 0.33a+git20220705.1.d54294a+ds Debian Janitor 1 year, 7 months ago

3 changed file(s) with 104 addition(s) and 85 deletion(s). Raw diff Collapse all Expand all

+51

-51

META.json less more

0		{
1		"abstract" : "Split text into sentences",
2		"author" : [
3		"Shlomo Yona, Kim Ryan <kimryan at cpan org>"
4		],
5		"dynamic_config" : 1,
6		"generated_by" : "Module::Build version 0.4229",
7		"license" : [
8		"perl_5"
9		],
10		"meta-spec" : {
11		"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
12		"version" : 2
13		},
14		"name" : "Lingua-EN-Sentence",
15		"prereqs" : {
16		"build" : {
17		"requires" : {
18		"Test::More" : "0.94"
19		}
20		},
21		"configure" : {
22		"requires" : {
23		"Module::Build" : "0.38"
24		}
25		},
26		"runtime" : {
27		"requires" : {
28		"perl" : "v5.10.0",
29		"warnings" : "1.06"
30		}
31		}
32		},
33		"provides" : {
34		"Lingua::EN::Sentence" : {
35		"file" : "lib/Lingua/EN/Sentence.pm",
36		"version" : "0.33"
37		}
38		},
39		"release_status" : "stable",
40		"resources" : {
41		"license" : [
42		"http://dev.perl.org/licenses/"
43		],
44		"repository" : {
45		"url" : "https://github.com/kimryan/Lingua-EN-Sentence"
46		}
47		},
48		"version" : "0.33",
49		"x_serialization_backend" : "JSON::PP version 4.04"
50		}
	0	{
	1	"abstract" : "Split text into sentences",
	2	"author" : [
	3	"Shlomo Yona, Kim Ryan <kimryan at cpan org>"
	4	],
	5	"dynamic_config" : 1,
	6	"generated_by" : "Module::Build version 0.4231",
	7	"license" : [
	8	"perl_5"
	9	],
	10	"meta-spec" : {
	11	"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
	12	"version" : 2
	13	},
	14	"name" : "Lingua-EN-Sentence",
	15	"prereqs" : {
	16	"build" : {
	17	"requires" : {
	18	"Test::More" : "0.94"
	19	}
	20	},
	21	"configure" : {
	22	"requires" : {
	23	"Module::Build" : "0.38"
	24	}
	25	},
	26	"runtime" : {
	27	"requires" : {
	28	"perl" : "v5.10.0",
	29	"warnings" : "1.06"
	30	}
	31	}
	32	},
	33	"provides" : {
	34	"Lingua::EN::Sentence" : {
	35	"file" : "lib/Lingua/EN/Sentence.pm",
	36	"version" : "0.33"
	37	}
	38	},
	39	"release_status" : "stable",
	40	"resources" : {
	41	"license" : [
	42	"http://dev.perl.org/licenses/"
	43	],
	44	"repository" : {
	45	"url" : "https://github.com/kimryan/Lingua-EN-Sentence"
	46	}
	47	},
	48	"version" : "0.33",
	49	"x_serialization_backend" : "JSON::PP version 4.06"
	50	}

+27

-27

META.yml less more

0		---
1		abstract: 'Split text into sentences'
2		author:
3		- 'Shlomo Yona, Kim Ryan <kimryan at cpan org>'
4		build_requires:
5		Test::More: '0.94'
6		configure_requires:
7		Module::Build: '0.38'
8		dynamic_config: 1
9		generated_by: 'Module::Build version 0.4229, CPAN::Meta::Converter version 2.150010'
10		license: perl
11		meta-spec:
12		url: http://module-build.sourceforge.net/META-spec-v1.4.html
13		version: '1.4'
14		name: Lingua-EN-Sentence
15		provides:
16		Lingua::EN::Sentence:
17		file: lib/Lingua/EN/Sentence.pm
18		version: '0.33'
19		requires:
20		perl: v5.10.0
21		warnings: '1.06'
22		resources:
23		license: http://dev.perl.org/licenses/
24		repository: https://github.com/kimryan/Lingua-EN-Sentence
25		version: '0.33'
26		x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
	0	---
	1	abstract: 'Split text into sentences'
	2	author:
	3	- 'Shlomo Yona, Kim Ryan <kimryan at cpan org>'
	4	build_requires:
	5	Test::More: '0.94'
	6	configure_requires:
	7	Module::Build: '0.38'
	8	dynamic_config: 1
	9	generated_by: 'Module::Build version 0.4231, CPAN::Meta::Converter version 2.150010'
	10	license: perl
	11	meta-spec:
	12	url: http://module-build.sourceforge.net/META-spec-v1.4.html
	13	version: '1.4'
	14	name: Lingua-EN-Sentence
	15	provides:
	16	Lingua::EN::Sentence:
	17	file: lib/Lingua/EN/Sentence.pm
	18	version: '0.33'
	19	requires:
	20	perl: v5.10.0
	21	warnings: '1.06'
	22	resources:
	23	license: http://dev.perl.org/licenses/
	24	repository: https://github.com/kimryan/Lingua-EN-Sentence
	25	version: '0.33'
	26	x_serialization_backend: 'CPAN::Meta::YAML version 0.018'

+26

-7

README less more

5	5	SYNOPSIS
6	6
7	7	use Lingua::EN::Sentence qw( get_sentences add_acronyms );
	8
	9	add_acronyms('lt','gen'); ## adding support for 'Lt. Gen.'
	10	my $text = q{
	11	A sentence usually ends with a dot, exclamation or question mark optionally followed by a space!
	12	A string followed by 2 carriage returns denotes a sentence, even though it doesn't end in a dot
	13
	14	Dots after single letters such as U.S.A. or in numbers like -12.34 will not cause a split
	15	as well as common abbreviations such as Dr. I. Smith, Ms. A.B. Jones, Apr. Calif. Esq.
	16	and (some text) ellipsis such as ... or . . are ignored.
	17	Some valid cases canot be deteected, such as the answer is X. It cannot easily be
	18	differentiated from the single letter-dot sequence to abbreviate a person's given name.
	19	Numbered points within a sentence will not cause a split 1. Like this one.
	20	See the code for all the rules that apply.
	21	This string has 7 sentences.
	22	};
	23
	24	my $sentences=get_sentences($text); # Get the sentences.
	25	foreach my $sent (@$sentences)
	26	{
	27	$i++;
	28	print("SENTENCE $i:$sent\n");
	29	}
8	30
9		add_acronyms('lt','gen'); ## adding support for 'Lt. Gen.'
10		my $sentences=get_sentences($text); ## Get the sentences.
11		foreach my $sentence (@$sentences) {
12		## do something with $sentence
13		}
14
	31
15	32	DESCRIPTION
16	33
17		The Lingua::EN::Sentence module contains the function get_sentences, which
	34	The C<Lingua::EN::Sentence> module contains the function get_sentences, which
18	35	splits text into its constituent sentences, based on a regular expression and a
19	36	list of abbreviations (built in and given).
20	37

22	39	segmentations. But some of them are already integrated into this code and are
23	40	being taken care of. Still, if you see that there are words causing the
24	41	get_sentences function to fail, you can add those to the module, so it notices them.
	42	Note that abbreviations are case sensitive, so 'Mrs.' is recognised but not 'mrs.'
	43
25	44
26	45
27	46	INSTALLATION