Codebase list liblingua-en-sentence-perl / a8fc51c
Import upstream version 0.33a+git20220705.1.d54294a+ds Debian Janitor 1 year, 7 months ago
3 changed file(s) with 104 addition(s) and 85 deletion(s). Raw diff Collapse all Expand all
0 {
1 "abstract" : "Split text into sentences",
2 "author" : [
3 "Shlomo Yona, Kim Ryan <kimryan at cpan org>"
4 ],
5 "dynamic_config" : 1,
6 "generated_by" : "Module::Build version 0.4229",
7 "license" : [
8 "perl_5"
9 ],
10 "meta-spec" : {
11 "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
12 "version" : 2
13 },
14 "name" : "Lingua-EN-Sentence",
15 "prereqs" : {
16 "build" : {
17 "requires" : {
18 "Test::More" : "0.94"
19 }
20 },
21 "configure" : {
22 "requires" : {
23 "Module::Build" : "0.38"
24 }
25 },
26 "runtime" : {
27 "requires" : {
28 "perl" : "v5.10.0",
29 "warnings" : "1.06"
30 }
31 }
32 },
33 "provides" : {
34 "Lingua::EN::Sentence" : {
35 "file" : "lib/Lingua/EN/Sentence.pm",
36 "version" : "0.33"
37 }
38 },
39 "release_status" : "stable",
40 "resources" : {
41 "license" : [
42 "http://dev.perl.org/licenses/"
43 ],
44 "repository" : {
45 "url" : "https://github.com/kimryan/Lingua-EN-Sentence"
46 }
47 },
48 "version" : "0.33",
49 "x_serialization_backend" : "JSON::PP version 4.04"
50 }
0 {
1 "abstract" : "Split text into sentences",
2 "author" : [
3 "Shlomo Yona, Kim Ryan <kimryan at cpan org>"
4 ],
5 "dynamic_config" : 1,
6 "generated_by" : "Module::Build version 0.4231",
7 "license" : [
8 "perl_5"
9 ],
10 "meta-spec" : {
11 "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
12 "version" : 2
13 },
14 "name" : "Lingua-EN-Sentence",
15 "prereqs" : {
16 "build" : {
17 "requires" : {
18 "Test::More" : "0.94"
19 }
20 },
21 "configure" : {
22 "requires" : {
23 "Module::Build" : "0.38"
24 }
25 },
26 "runtime" : {
27 "requires" : {
28 "perl" : "v5.10.0",
29 "warnings" : "1.06"
30 }
31 }
32 },
33 "provides" : {
34 "Lingua::EN::Sentence" : {
35 "file" : "lib/Lingua/EN/Sentence.pm",
36 "version" : "0.33"
37 }
38 },
39 "release_status" : "stable",
40 "resources" : {
41 "license" : [
42 "http://dev.perl.org/licenses/"
43 ],
44 "repository" : {
45 "url" : "https://github.com/kimryan/Lingua-EN-Sentence"
46 }
47 },
48 "version" : "0.33",
49 "x_serialization_backend" : "JSON::PP version 4.06"
50 }
0 ---
1 abstract: 'Split text into sentences'
2 author:
3 - 'Shlomo Yona, Kim Ryan <kimryan at cpan org>'
4 build_requires:
5 Test::More: '0.94'
6 configure_requires:
7 Module::Build: '0.38'
8 dynamic_config: 1
9 generated_by: 'Module::Build version 0.4229, CPAN::Meta::Converter version 2.150010'
10 license: perl
11 meta-spec:
12 url: http://module-build.sourceforge.net/META-spec-v1.4.html
13 version: '1.4'
14 name: Lingua-EN-Sentence
15 provides:
16 Lingua::EN::Sentence:
17 file: lib/Lingua/EN/Sentence.pm
18 version: '0.33'
19 requires:
20 perl: v5.10.0
21 warnings: '1.06'
22 resources:
23 license: http://dev.perl.org/licenses/
24 repository: https://github.com/kimryan/Lingua-EN-Sentence
25 version: '0.33'
26 x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
0 ---
1 abstract: 'Split text into sentences'
2 author:
3 - 'Shlomo Yona, Kim Ryan <kimryan at cpan org>'
4 build_requires:
5 Test::More: '0.94'
6 configure_requires:
7 Module::Build: '0.38'
8 dynamic_config: 1
9 generated_by: 'Module::Build version 0.4231, CPAN::Meta::Converter version 2.150010'
10 license: perl
11 meta-spec:
12 url: http://module-build.sourceforge.net/META-spec-v1.4.html
13 version: '1.4'
14 name: Lingua-EN-Sentence
15 provides:
16 Lingua::EN::Sentence:
17 file: lib/Lingua/EN/Sentence.pm
18 version: '0.33'
19 requires:
20 perl: v5.10.0
21 warnings: '1.06'
22 resources:
23 license: http://dev.perl.org/licenses/
24 repository: https://github.com/kimryan/Lingua-EN-Sentence
25 version: '0.33'
26 x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
55 SYNOPSIS
66
77 use Lingua::EN::Sentence qw( get_sentences add_acronyms );
8
9 add_acronyms('lt','gen'); ## adding support for 'Lt. Gen.'
10 my $text = q{
11 A sentence usually ends with a dot, exclamation or question mark optionally followed by a space!
12 A string followed by 2 carriage returns denotes a sentence, even though it doesn't end in a dot
13
14 Dots after single letters such as U.S.A. or in numbers like -12.34 will not cause a split
15 as well as common abbreviations such as Dr. I. Smith, Ms. A.B. Jones, Apr. Calif. Esq.
16 and (some text) ellipsis such as ... or . . are ignored.
17 Some valid cases canot be deteected, such as the answer is X. It cannot easily be
18 differentiated from the single letter-dot sequence to abbreviate a person's given name.
19 Numbered points within a sentence will not cause a split 1. Like this one.
20 See the code for all the rules that apply.
21 This string has 7 sentences.
22 };
23
24 my $sentences=get_sentences($text); # Get the sentences.
25 foreach my $sent (@$sentences)
26 {
27 $i++;
28 print("SENTENCE $i:$sent\n");
29 }
830
9 add_acronyms('lt','gen'); ## adding support for 'Lt. Gen.'
10 my $sentences=get_sentences($text); ## Get the sentences.
11 foreach my $sentence (@$sentences) {
12 ## do something with $sentence
13 }
14
31
1532 DESCRIPTION
1633
17 The Lingua::EN::Sentence module contains the function get_sentences, which
34 The C<Lingua::EN::Sentence> module contains the function get_sentences, which
1835 splits text into its constituent sentences, based on a regular expression and a
1936 list of abbreviations (built in and given).
2037
2239 segmentations. But some of them are already integrated into this code and are
2340 being taken care of. Still, if you see that there are words causing the
2441 get_sentences function to fail, you can add those to the module, so it notices them.
42 Note that abbreviations are case sensitive, so 'Mrs.' is recognised but not 'mrs.'
43
2544
2645
2746 INSTALLATION