Import upstream version 0.33a+git20220705.1.d54294a+ds
Debian Janitor
1 year, 7 months ago
0 | { | |
1 | "abstract" : "Split text into sentences", | |
2 | "author" : [ | |
3 | "Shlomo Yona, Kim Ryan <kimryan at cpan org>" | |
4 | ], | |
5 | "dynamic_config" : 1, | |
6 | "generated_by" : "Module::Build version 0.4229", | |
7 | "license" : [ | |
8 | "perl_5" | |
9 | ], | |
10 | "meta-spec" : { | |
11 | "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", | |
12 | "version" : 2 | |
13 | }, | |
14 | "name" : "Lingua-EN-Sentence", | |
15 | "prereqs" : { | |
16 | "build" : { | |
17 | "requires" : { | |
18 | "Test::More" : "0.94" | |
19 | } | |
20 | }, | |
21 | "configure" : { | |
22 | "requires" : { | |
23 | "Module::Build" : "0.38" | |
24 | } | |
25 | }, | |
26 | "runtime" : { | |
27 | "requires" : { | |
28 | "perl" : "v5.10.0", | |
29 | "warnings" : "1.06" | |
30 | } | |
31 | } | |
32 | }, | |
33 | "provides" : { | |
34 | "Lingua::EN::Sentence" : { | |
35 | "file" : "lib/Lingua/EN/Sentence.pm", | |
36 | "version" : "0.33" | |
37 | } | |
38 | }, | |
39 | "release_status" : "stable", | |
40 | "resources" : { | |
41 | "license" : [ | |
42 | "http://dev.perl.org/licenses/" | |
43 | ], | |
44 | "repository" : { | |
45 | "url" : "https://github.com/kimryan/Lingua-EN-Sentence" | |
46 | } | |
47 | }, | |
48 | "version" : "0.33", | |
49 | "x_serialization_backend" : "JSON::PP version 4.04" | |
50 | } | |
0 | { | |
1 | "abstract" : "Split text into sentences", | |
2 | "author" : [ | |
3 | "Shlomo Yona, Kim Ryan <kimryan at cpan org>" | |
4 | ], | |
5 | "dynamic_config" : 1, | |
6 | "generated_by" : "Module::Build version 0.4231", | |
7 | "license" : [ | |
8 | "perl_5" | |
9 | ], | |
10 | "meta-spec" : { | |
11 | "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", | |
12 | "version" : 2 | |
13 | }, | |
14 | "name" : "Lingua-EN-Sentence", | |
15 | "prereqs" : { | |
16 | "build" : { | |
17 | "requires" : { | |
18 | "Test::More" : "0.94" | |
19 | } | |
20 | }, | |
21 | "configure" : { | |
22 | "requires" : { | |
23 | "Module::Build" : "0.38" | |
24 | } | |
25 | }, | |
26 | "runtime" : { | |
27 | "requires" : { | |
28 | "perl" : "v5.10.0", | |
29 | "warnings" : "1.06" | |
30 | } | |
31 | } | |
32 | }, | |
33 | "provides" : { | |
34 | "Lingua::EN::Sentence" : { | |
35 | "file" : "lib/Lingua/EN/Sentence.pm", | |
36 | "version" : "0.33" | |
37 | } | |
38 | }, | |
39 | "release_status" : "stable", | |
40 | "resources" : { | |
41 | "license" : [ | |
42 | "http://dev.perl.org/licenses/" | |
43 | ], | |
44 | "repository" : { | |
45 | "url" : "https://github.com/kimryan/Lingua-EN-Sentence" | |
46 | } | |
47 | }, | |
48 | "version" : "0.33", | |
49 | "x_serialization_backend" : "JSON::PP version 4.06" | |
50 | } |
0 | --- | |
1 | abstract: 'Split text into sentences' | |
2 | author: | |
3 | - 'Shlomo Yona, Kim Ryan <kimryan at cpan org>' | |
4 | build_requires: | |
5 | Test::More: '0.94' | |
6 | configure_requires: | |
7 | Module::Build: '0.38' | |
8 | dynamic_config: 1 | |
9 | generated_by: 'Module::Build version 0.4229, CPAN::Meta::Converter version 2.150010' | |
10 | license: perl | |
11 | meta-spec: | |
12 | url: http://module-build.sourceforge.net/META-spec-v1.4.html | |
13 | version: '1.4' | |
14 | name: Lingua-EN-Sentence | |
15 | provides: | |
16 | Lingua::EN::Sentence: | |
17 | file: lib/Lingua/EN/Sentence.pm | |
18 | version: '0.33' | |
19 | requires: | |
20 | perl: v5.10.0 | |
21 | warnings: '1.06' | |
22 | resources: | |
23 | license: http://dev.perl.org/licenses/ | |
24 | repository: https://github.com/kimryan/Lingua-EN-Sentence | |
25 | version: '0.33' | |
26 | x_serialization_backend: 'CPAN::Meta::YAML version 0.018' | |
0 | --- | |
1 | abstract: 'Split text into sentences' | |
2 | author: | |
3 | - 'Shlomo Yona, Kim Ryan <kimryan at cpan org>' | |
4 | build_requires: | |
5 | Test::More: '0.94' | |
6 | configure_requires: | |
7 | Module::Build: '0.38' | |
8 | dynamic_config: 1 | |
9 | generated_by: 'Module::Build version 0.4231, CPAN::Meta::Converter version 2.150010' | |
10 | license: perl | |
11 | meta-spec: | |
12 | url: http://module-build.sourceforge.net/META-spec-v1.4.html | |
13 | version: '1.4' | |
14 | name: Lingua-EN-Sentence | |
15 | provides: | |
16 | Lingua::EN::Sentence: | |
17 | file: lib/Lingua/EN/Sentence.pm | |
18 | version: '0.33' | |
19 | requires: | |
20 | perl: v5.10.0 | |
21 | warnings: '1.06' | |
22 | resources: | |
23 | license: http://dev.perl.org/licenses/ | |
24 | repository: https://github.com/kimryan/Lingua-EN-Sentence | |
25 | version: '0.33' | |
26 | x_serialization_backend: 'CPAN::Meta::YAML version 0.018' |
5 | 5 | SYNOPSIS |
6 | 6 | |
7 | 7 | use Lingua::EN::Sentence qw( get_sentences add_acronyms ); |
8 | ||
9 | add_acronyms('lt','gen'); ## adding support for 'Lt. Gen.' | |
10 | my $text = q{ | |
11 | A sentence usually ends with a dot, exclamation or question mark optionally followed by a space! | |
12 | A string followed by 2 carriage returns denotes a sentence, even though it doesn't end in a dot | |
13 | ||
14 | Dots after single letters such as U.S.A. or in numbers like -12.34 will not cause a split | |
15 | as well as common abbreviations such as Dr. I. Smith, Ms. A.B. Jones, Apr. Calif. Esq. | |
16 | and (some text) ellipsis such as ... or . . are ignored. | |
17 | Some valid cases canot be deteected, such as the answer is X. It cannot easily be | |
18 | differentiated from the single letter-dot sequence to abbreviate a person's given name. | |
19 | Numbered points within a sentence will not cause a split 1. Like this one. | |
20 | See the code for all the rules that apply. | |
21 | This string has 7 sentences. | |
22 | }; | |
23 | ||
24 | my $sentences=get_sentences($text); # Get the sentences. | |
25 | foreach my $sent (@$sentences) | |
26 | { | |
27 | $i++; | |
28 | print("SENTENCE $i:$sent\n"); | |
29 | } | |
8 | 30 | |
9 | add_acronyms('lt','gen'); ## adding support for 'Lt. Gen.' | |
10 | my $sentences=get_sentences($text); ## Get the sentences. | |
11 | foreach my $sentence (@$sentences) { | |
12 | ## do something with $sentence | |
13 | } | |
14 | ||
31 | ||
15 | 32 | DESCRIPTION |
16 | 33 | |
17 | The Lingua::EN::Sentence module contains the function get_sentences, which | |
34 | The C<Lingua::EN::Sentence> module contains the function get_sentences, which | |
18 | 35 | splits text into its constituent sentences, based on a regular expression and a |
19 | 36 | list of abbreviations (built in and given). |
20 | 37 | |
22 | 39 | segmentations. But some of them are already integrated into this code and are |
23 | 40 | being taken care of. Still, if you see that there are words causing the |
24 | 41 | get_sentences function to fail, you can add those to the module, so it notices them. |
42 | Note that abbreviations are case sensitive, so 'Mrs.' is recognised but not 'mrs.' | |
43 | ||
25 | 44 | |
26 | 45 | |
27 | 46 | INSTALLATION |