Codebase list ruby-numerizer / upstream/0.2.0+git20220203.1.b1f7f52
Import upstream version 0.2.0+git20220203.1.b1f7f52 Debian Janitor 1 year, 3 months ago
13 changed file(s) with 525 addition(s) and 333 deletion(s). Raw diff Collapse all Expand all
0 source "https://rubygems.org"
1
2 gemspec
0 PATH
1 remote: .
2 specs:
3 numerizer (0.2.0)
4
5 GEM
6 remote: https://rubygems.org/
7 specs:
8 minitest (5.15.0)
9 rake (13.0.6)
10
11 PLATFORMS
12 ruby
13
14 DEPENDENCIES
15 minitest (~> 5.0)
16 numerizer!
17 rake (~> 13)
18
19 BUNDLED WITH
20 2.3.6
00 = Numerizer
1 {<img src="https://github.com/jduff/numerizer/actions/workflows/test.yaml/badge.svg?branch=master" alt="Build Status" />}[https://github.com/jduff/numerizer/actions/workflows/test.yaml?query=branch%3Amaster]
12
2 Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.
3 Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem https://github.com/mojombo/chronic.
34
45 == Installation
56
6 $ sudo gem sources -a http://gemcutter.org
7 $ sudo gem install numerizer
7 $ gem install numerizer
88
99 == Usage
1010
2020 => "2.375"
2121
2222 == Contributors
23 Tom Preston-Werner, John Duff
23 Tom Preston-Werner, John Duff
00 require 'rubygems'
11 require 'rake'
22
3 begin
4 require 'jeweler'
5 Jeweler::Tasks.new do |gem|
6 gem.name = "numerizer"
7 gem.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)."
8 gem.description = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic."
9 gem.email = "duff.john@gmail.com"
10 gem.homepage = "http://github.com/jduff/numerizer"
11 gem.license = 'MIT'
12 gem.authors = ["John Duff"]
13 # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14 end
15 Jeweler::GemcutterTasks.new
16 rescue LoadError
17 puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
3 $:.unshift File.expand_path('../lib', __FILE__)
4 require 'numerizer/version'
5
6 def version
7 Numerizer::VERSION
188 end
199
2010 require 'rake/testtask'
3626 end
3727 end
3828
39 # task :test => :check_dependencies
29 desc "Release Numerizer version #{version}"
30 task :release => :build do
31 unless `git branch` =~ /^\* master$/
32 puts "You must be on the master branch to release!"
33 exit!
34 end
35 sh "git commit --allow-empty -a -m 'Release #{version}'"
36 sh "git tag v#{version}"
37 sh "git push origin master"
38 sh "git push origin v#{version}"
39 sh "gem push pkg/numerizer-#{version}.gem"
40 end
41
42 desc 'Build a gem from the gemspec'
43 task :build do
44 FileUtils.mkdir_p 'pkg'
45 sh 'gem build numerizer.gemspec'
46 FileUtils.mv("./numerizer-#{version}.gem", "pkg")
47 end
48
4049
4150 task :default => :test
42
43 # require 'rake/rdoctask'
44 # Rake::RDocTask.new do |rdoc|
45 # if File.exist?('VERSION')
46 # version = File.read('VERSION')
47 # else
48 # version = ""
49 # end
50
51 # rdoc.rdoc_dir = 'rdoc'
52 # rdoc.title = "numerizer #{version}"
53 # rdoc.rdoc_files.include('README*')
54 # rdoc.rdoc_files.include('lib/**/*.rb')
55 # end
+0
-1
VERSION less more
0 0.2.0
0 class Numerizer
1 VERSION = '0.2.0'
2 end
99 #
1010 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1111
12 require 'strscan'
12 require 'numerizer/version'
13 require 'providers/english_provider'
14 require 'set'
1315
1416 class Numerizer
1517
16 DIRECT_NUMS = [
17 ['eleven', '11'],
18 ['twelve', '12'],
19 ['thirteen', '13'],
20 ['fourteen', '14'],
21 ['fifteen', '15'],
22 ['sixteen', '16'],
23 ['seventeen', '17'],
24 ['eighteen', '18'],
25 ['nineteen', '19'],
26 ['ninteen', '19'], # Common mis-spelling
27 ['zero', '0'],
28 ['ten', '10'],
29 ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
30 ]
18 PROVIDERS = {'en' => EnglishProvider.new}
3119
32 SINGLE_NUMS = [
33 ['one', 1],
34 ['two', 2],
35 ['three', 3],
36 ['four', 4],
37 ['five', 5],
38 ['six', 6],
39 ['seven', 7],
40 ['eight', 8],
41 ['nine', 9]
42 ]
43
44 TEN_PREFIXES = [
45 ['twenty', 20],
46 ['thirty', 30],
47 ['forty', 40],
48 ['fourty', 40], # Common misspelling
49 ['fifty', 50],
50 ['sixty', 60],
51 ['seventy', 70],
52 ['eighty', 80],
53 ['ninety', 90]
54 ]
55
56 BIG_PREFIXES = [
57 ['hundred', 100],
58 ['thousand', 1000],
59 ['million', 1_000_000],
60 ['billion', 1_000_000_000],
61 ['trillion', 1_000_000_000_000],
62 ]
63
64 FRACTIONS = [
65 ['half', 2],
66 ['third(s)?', 3],
67 ['fourth(s)?', 4],
68 ['quarter(s)?', 4],
69 ['fifth(s)?', 5],
70 ['sixth(s)?', 6],
71 ['seventh(s)?', 7],
72 ['eighth(s)?', 8],
73 ['nineth(s)?', 9],
74 ]
75
76 SINGLE_ORDINALS = [
77 ['first', 1],
78 ['third', 3],
79 ['fourth', 4],
80 ['fifth', 5],
81 ['sixth', 6],
82 ['seventh', 7],
83 ['eighth', 8],
84 ['ninth', 9]
85 ]
86
87 DIRECT_ORDINALS = [
88 ['tenth', '10'],
89 ['eleventh', '11'],
90 ['twelfth', '12'],
91 ['thirteenth', '13'],
92 ['fourteenth', '14'],
93 ['fifteenth', '15'],
94 ['sixteenth', '16'],
95 ['seventeenth', '17'],
96 ['eighteenth', '18'],
97 ['nineteenth', '19'],
98 ['twentieth', '20'],
99 ['thirtieth', '30'],
100 ['fourtieth', '40'],
101 ['fiftieth', '50'],
102 ['sixtieth', '60'],
103 ['seventieth', '70'],
104 ['eightieth', '80'],
105 ['ninetieth', '90']
106 ]
107
108 def self.numerize(string)
20 def self.numerize(string,lang: 'en', ignore: [], bias: :none)
10921 string = string.dup
110
111 # preprocess
112 string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words
113
114 # easy/direct replacements
115 (DIRECT_NUMS + SINGLE_NUMS).each do |dn|
116 string.gsub!(/(^|\W)#{dn[0]}(?=$|\W)/i, '\1<num>' + dn[1].to_s)
22 ignore = ignore.map(&:downcase).to_set
23 provider = PROVIDERS[lang]
24 if provider == nil
25 raise "Language #{lang} not found. Language options include #{PROVIDERS.keys}"
11726 end
118
119 # ten, twenty, etc.
120 TEN_PREFIXES.each do |tp|
121 SINGLE_NUMS.each do |dn|
122 string.gsub!(/(^|\W)#{tp[0]}#{dn[0]}(?=$|\W)/i, '\1<num>' + (tp[1] + dn[1]).to_s)
123 end
124 SINGLE_ORDINALS.each do |dn|
125 string.gsub!(/(^|\W)#{tp[0]}(\s)?#{dn[0]}(?=$|\W)/i, '\1<num>' + (tp[1] + dn[1]).to_s + dn[0][-2, 2])
126 end
127 string.gsub!(/(^|\W)#{tp[0]}(?=$|\W)/i, '\1<num>' + tp[1].to_s)
128 end
129
130 # handle fractions
131 FRACTIONS.each do |tp|
132 string.gsub!(/a #{tp[0]}(?=$|\W)/i, '<num>1/' + tp[1].to_s)
133 string.gsub!(/\s#{tp[0]}(?=$|\W)/i, '/' + tp[1].to_s)
134 end
135
136 (DIRECT_ORDINALS + SINGLE_ORDINALS).each do |on|
137 string.gsub!(/(^|\W)#{on[0]}(?=$|\W)/i, '\1<num>' + on[1].to_s + on[0][-2, 2])
138 end
139
140 # evaluate fractions when preceded by another number
141 string.gsub!(/(\d+)(?: | and |-)+(<num>|\s)*(\d+)\s*\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s }
142
143 # hundreds, thousands, millions, etc.
144 BIG_PREFIXES.each do |bp|
145 string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { $1.empty? ? bp[1] : '<num>' + (bp[1] * $1.to_i).to_s }
146 andition(string)
147 end
148
149 andition(string)
150
151 string.gsub(/<num>/, '')
152 end
153
154 class << self
155 private
156 def andition(string)
157 sc = StringScanner.new(string)
158 while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
159 if sc[2] =~ /and/ || sc[1].size > sc[3].size
160 string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
161 sc.reset
162 end
163 end
164 end
27 provider.numerize(string, ignore: ignore, bias: bias)
16528 end
16629
16730 end
0 class GenericProvider
1
2 def numerize(str, ignore: [], bias: :none)
3 preprocess(str, ignore)
4 numerize_numerals(str, ignore, bias)
5 numerize_fractions(str, ignore, bias)
6 numerize_ordinals(str, ignore, bias)
7 numerize_big_prefixes(str, ignore, bias)
8 postprocess(str, ignore)
9 end
10
11 private
12
13 def preprocess(str, ignore)
14 raise 'must be implemented in subclass'
15 end
16 def numerize_numerals(str, ignore, bias)
17 raise 'must be implemented in subclass'
18 end
19 def numerize_fractions(str, ignore, bias)
20 raise 'must be implemented in subclass'
21 end
22 def numerize_ordinals(str, ignore, bias)
23 raise 'must be implemented in subclass'
24 end
25 def numerize_big_prefixes(str, ignore, bias)
26 raise 'must be implemented in subclass'
27 end
28 def postprocess(str, ignore)
29 raise 'must be implemented in subclass'
30 end
31
32 # Turns list of words into a unionized list, ignoring words specified in
33 # arguments or that meet the conditions of the yield block
34 def regexify(words, ignore:[])
35 if block_given?
36 return Regexp.union(words.reject { |x| ignore.include?(x) || yield(x) })
37 else
38 return Regexp.union(words.reject { |x| ignore.include?(x) })
39 end
40 end
41
42 end
0 require 'provider'
1 require 'strscan'
2
3 class EnglishProvider < GenericProvider
4
5 DIRECT_NUMS = {
6 'eleven' => '11',
7 'twelve' => '12',
8 'thirteen' => '13',
9 'fourteen' => '14',
10 'fifteen' => '15',
11 'sixteen' => '16',
12 'seventeen' => '17',
13 'eighteen' => '18',
14 'nineteen' => '19',
15 'ninteen' => '19',
16 'zero' => '0',
17 'ten' => '10',
18 }
19
20 SINGLE_NUMS = {
21 'one' => 1,
22 'two' => 2,
23 'three' => 3,
24 'four' => 4,
25 'five' => 5,
26 'six' => 6,
27 'seven' => 7,
28 'eight' => 8,
29 'nine' => 9
30 }
31
32 TEN_PREFIXES = {
33 'twenty' => 20,
34 'thirty' => 30,
35 'forty' => 40,
36 'fourty' => 40,
37 'fifty' => 50,
38 'sixty' => 60,
39 'seventy' => 70,
40 'eighty' => 80,
41 'ninety' => 90
42 }
43
44 BIG_PREFIXES = {
45 'hundred' => 100,
46 'thousand' => 1000,
47 'million' => 1_000_000,
48 'billion' => 1_000_000_000,
49 'trillion' => 1_000_000_000_000,
50 }
51
52 FRACTIONS = {
53 'half' => 2,
54 'halves' => 2,
55 'quarter' => 4,
56 'quarters' => 4
57 }
58
59 ORDINALS = {
60 'first' => 1,
61 'second' => 2,
62 }
63
64 SINGLE_ORDINAL_FRACTIONALS = {
65 'third' => 3,
66 'fourth' => 4,
67 'fifth' => 5,
68 'sixth' => 6,
69 'seventh' => 7,
70 'eighth' => 8,
71 'ninth' => 9,
72 }
73
74 DIRECT_ORDINAL_FRACTIONALS = {
75 'tenth' => '10',
76 'eleventh' => '11',
77 'twelfth' => '12',
78 'thirteenth' => '13',
79 'fourteenth' => '14',
80 'fifteenth' => '15',
81 'sixteenth' => '16',
82 'seventeenth' => '17',
83 'eighteenth' => '18',
84 'nineteenth' => '19',
85 'twentieth' => '20',
86 'thirtieth' => '30',
87 'fourtieth' => '40',
88 'fiftieth' => '50',
89 'sixtieth' => '60',
90 'seventieth' => '70',
91 'eightieth' => '80',
92 'ninetieth' => '90'
93 }
94
95 ALL_ORDINALS = ORDINALS.merge(SINGLE_ORDINAL_FRACTIONALS).merge(DIRECT_ORDINAL_FRACTIONALS)
96 ONLY_PLURAL_FRACTIONS = FRACTIONS.merge((SINGLE_ORDINAL_FRACTIONALS.merge(DIRECT_ORDINAL_FRACTIONALS)).inject({ }) {|h, (k,v)| h[k + 's'] = v ; h})
97 ALL_FRACTIONS = ONLY_PLURAL_FRACTIONS.merge(SINGLE_ORDINAL_FRACTIONALS).merge(DIRECT_ORDINAL_FRACTIONALS)
98
99 DIRECT_SINGLE_NUMS = DIRECT_NUMS.merge(SINGLE_NUMS)
100 DIRECT_NUMS_TEN_PREFIXES = DIRECT_NUMS.merge(TEN_PREFIXES)
101 ORDINAL_SINGLE = ORDINALS.merge(SINGLE_ORDINAL_FRACTIONALS)
102
103 # REGEXP.UNION here breaks insertion into negative Lookbehind
104 ALL_ORDINALS_REGEX = ALL_ORDINALS.keys.reduce {|a,b| a + '|' + b}
105 PRONOUNS = ['i','you','he','she','we','it','you','they','to','the'].reduce {|a,b| a + '|' + b}
106
107 def preprocess(string, ignore)
108 string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words
109 string.gsub!(/\ba$/, '') && string.rstrip! # doesn't make sense for an 'a' at the end to be a 1
110 end
111
112 def numerize_numerals(string, ignore, bias)
113 single_nums = regexify(SINGLE_NUMS.keys, ignore: ignore)
114 dir_single_nums = regexify(DIRECT_SINGLE_NUMS.keys, ignore: ignore)
115 ten_prefs = regexify(TEN_PREFIXES.keys, ignore: ignore)
116 dir_nums_ten_prefs = regexify(DIRECT_NUMS_TEN_PREFIXES.keys, ignore: ignore)
117 single_ords = regexify(ORDINAL_SINGLE.keys, ignore: ignore)
118
119 # easy/direct replacements
120 string.gsub!(/(^|\W)(#{single_nums})\s(#{dir_nums_ten_prefs})(?=$|\W)/i) {$1 << $2 << ' hundred ' << $3}
121 string.gsub!(/(^|\W)(#{dir_single_nums})(?=$|\W)/i) { $1 << '<num>' << DIRECT_SINGLE_NUMS[$2].to_s}
122 if bias == :ordinal
123 string.gsub!(/(^|\W)\ba\b(?=$|\W)(?! (?:#{ALL_ORDINALS_REGEX}))/i, '\1<num>' + 1.to_s)
124 else
125 string.gsub!(/(^|\W)\ba\b(?=$|\W)/i, '\1<num>' + 1.to_s)
126 end
127
128 # ten, twenty, etc.
129 string.gsub!(/(^|\W)(#{ten_prefs})(#{single_nums})(?=$|\W)/i) { $1 << '<num>' << (TEN_PREFIXES[$2] + SINGLE_NUMS[$3]).to_s}
130 string.gsub!(/(^|\W)(#{ten_prefs})(\s)?(#{single_ords})(?=$|\W)/i) { $1 << '<num>' << (TEN_PREFIXES[$2] + ORDINAL_SINGLE[$4]).to_s << $4[-2, 2]}
131 string.gsub!(/(^|\W)(#{ten_prefs})(?=$|\W)/i) { $1 << '<num>' << TEN_PREFIXES[$2].to_s}
132 end
133
134 def numerize_fractions(string, ignore, bias)
135 # handle fractions
136 # only plural fractions if ordinal mode
137 # Ignore quarter to be handled seperately if not fractional mode
138 if bias == :ordinal
139 fractionals = regexify(ONLY_PLURAL_FRACTIONS.keys, ignore: ignore + ['quarter', 'quarters'])
140 elsif bias == :fractional
141 fractionals = regexify(ALL_FRACTIONS.keys, ignore: ignore)
142 else
143 fractionals = regexify(ALL_FRACTIONS.keys, ignore: ignore + ['quarter', 'quarters'])
144 end
145 quarters = regexify(['quarter', 'quarters'], ignore: ignore)
146
147 string.gsub!(/a (#{fractionals})(?=$|\W)/i) {'<num>1/' << ALL_FRACTIONS[$1].to_s}
148 # TODO : Find Noun Distinction for Quarter
149 if bias == :fractional
150 string.gsub!(/(^|\W)(#{fractionals})(?=$|\W)/i) {'/' << ALL_FRACTIONS[$2].to_s}
151 else
152 string.gsub!(/(?<!the|^)(\W)(#{fractionals})(?=$|\W)/i) { '/' << ALL_FRACTIONS[$2].to_s }
153 string.gsub!(/(?<!#{PRONOUNS})(^|\W)(#{quarters})(?=$|\W)/i) { '/' << ALL_FRACTIONS[$2].to_s }
154 end
155 cleanup_fractions(string)
156 end
157
158
159 def numerize_ordinals(string, ignore, bias)
160 return if bias == :fractionals
161 all_ords = regexify(ALL_ORDINALS.keys, ignore: ignore) {|x| x == 'second' && bias != :ordinal }
162 if bias != :ordinal && !ignore.include?('second')
163 string.gsub!(/(?<!second|\d|#{ALL_ORDINALS_REGEX})(^|\W)second(?=$|\W)/i) { $1 << '<num>' << ALL_ORDINALS['second'].to_s << 'second'[-2, 2] }
164 end
165 string.gsub!(/(^|\W)(#{all_ords})(?=$|\W)/i) { $1 << '<num>' << ALL_ORDINALS[$2].to_s << $2[-2, 2]}
166 end
167
168 # hundreds, thousands, millions, etc.
169 def numerize_big_prefixes(string, ignore, bias)
170 # big_prefs = regexify(BIG_PREFIXES.keys, ignore: ignore)
171 BIG_PREFIXES.each do |k,v|
172 next if ignore.include? k.downcase
173 string.gsub!(/(?:<num>)?(\d*) *#{k}/i) { $1.empty? ? v : '<num>' << (v * $1.to_i).to_s }
174 andition(string)
175 end
176 end
177
178 def postprocess(string, ignore)
179 andition(string)
180 numerize_halves(string, ignore)
181 #Strip Away Added Num Tags
182 string.gsub(/<num>/, '')
183 end
184
185 private
186
187 def cleanup_fractions(string)
188 # evaluate fractions when preceded by another number
189 string.gsub!(/(\d+)(?: | and |-)+(<num>|\s)*(\d+)\s*\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s }
190 # fix unpreceeded fractions
191 string.gsub!(/(?:^|\W)\/(\d+)/, '1/\1')
192 string.gsub!(/(?<=[a-zA-Z])\/(\d+)/, ' 1/\1')
193 end
194
195 # always substitute halfs
196 def numerize_halves(string, ignore)
197 return if ignore.include? 'half'
198 string.gsub!(/\bhalf\b/i, '1/2')
199 end
200
201 def andition(string)
202 sc = StringScanner.new(string)
203 while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
204 if sc[2] =~ /and/ || sc[1].size > sc[3].size
205 string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' << (sc[1].to_i + sc[3].to_i).to_s
206 sc.reset
207 end
208 end
209 end
210
211 end
0 # Generated by jeweler
1 # DO NOT EDIT THIS FILE DIRECTLY
2 # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
3 # -*- encoding: utf-8 -*-
4 # stub: numerizer 0.2.0 ruby lib
0 $:.unshift File.expand_path('../lib', __FILE__)
1 require 'numerizer/version'
52
63 Gem::Specification.new do |s|
74 s.name = "numerizer"
8 s.version = "0.2.0"
5 s.version = Numerizer::VERSION
96
107 s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
118 s.require_paths = ["lib"]
1714 "LICENSE",
1815 "README.rdoc"
1916 ]
20 s.files = [
21 ".document",
22 "LICENSE",
23 "README.rdoc",
24 "Rakefile",
25 "VERSION",
26 "lib/numerizer.rb",
27 "numerizer.gemspec",
28 "test/test_helper.rb",
29 "test/test_numerizer.rb"
30 ]
17 s.files = `git ls-files`.split($/)
18 s.test_files = `git ls-files -- test`.split($/)
3119 s.homepage = "http://github.com/jduff/numerizer"
3220 s.licenses = ["MIT"]
3321 s.rubygems_version = "2.2.2"
3422 s.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)."
23
24 s.add_development_dependency 'rake', '~> 13'
25 s.add_development_dependency 'minitest', '~> 5.0'
3526 end
3627
0 require 'rubygems'
1 require 'test/unit'
2
30 $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
41 $LOAD_PATH.unshift(File.dirname(__FILE__))
52 require 'numerizer'
63
7 class Test::Unit::TestCase
4 require 'minitest/autorun'
5
6 class TestCase < Minitest::Test
87 end
+0
-130
test/test_numerizer.rb less more
0 require File.join(File.dirname(__FILE__), 'test_helper')
1
2 class NumerizerTest < Test::Unit::TestCase
3 def test_straight_parsing
4 strings = {
5 1 => 'one',
6 5 => 'five',
7 10 => 'ten',
8 11 => 'eleven',
9 12 => 'twelve',
10 13 => 'thirteen',
11 14 => 'fourteen',
12 15 => 'fifteen',
13 16 => 'sixteen',
14 17 => 'seventeen',
15 18 => 'eighteen',
16 19 => 'nineteen',
17 20 => 'twenty',
18 27 => 'twenty seven',
19 31 => 'thirty-one',
20 37 => 'thirty-seven',
21 41 => 'forty one',
22 42 => 'fourty two',
23 59 => 'fifty nine',
24 100 => 'a hundred',
25 100 => 'one hundred',
26 150 => 'one hundred and fifty',
27 # 150 => 'one fifty',
28 200 => 'two-hundred',
29 500 => '5 hundred',
30 999 => 'nine hundred and ninety nine',
31 1_000 => 'one thousand',
32 1_200 => 'twelve hundred',
33 1_200 => 'one thousand two hundred',
34 17_000 => 'seventeen thousand',
35 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
36 74_002 => 'seventy four thousand and two',
37 99_999 => 'ninety nine thousand nine hundred ninety nine',
38 100_000 => '100 thousand',
39 250_000 => 'two hundred fifty thousand',
40 1_000_000 => 'one million',
41 1_250_007 => 'one million two hundred fifty thousand and seven',
42 1_000_000_000 => 'one billion',
43 1_000_000_001 => 'one billion and one'
44 }
45
46 strings.keys.sort.each do |key|
47 assert_equal key, Numerizer.numerize(strings[key]).to_i
48 end
49
50 assert_equal "2.5", Numerizer.numerize("two and a half")
51 assert_equal "1/2", Numerizer.numerize("one half")
52 end
53
54 def test_combined_double_digets
55 assert_equal "21", Numerizer.numerize("twentyone")
56 assert_equal "37", Numerizer.numerize("thirtyseven")
57 end
58
59 def test_fractions_in_words
60 assert_equal "1/4", Numerizer.numerize("1 quarter")
61 assert_equal "1/4", Numerizer.numerize("one quarter")
62 assert_equal "1/4", Numerizer.numerize("a quarter")
63 assert_equal "1/8", Numerizer.numerize("one eighth")
64
65 assert_equal "3/4", Numerizer.numerize("three quarters")
66 assert_equal "2/4", Numerizer.numerize("two fourths")
67 assert_equal "3/8", Numerizer.numerize("three eighths")
68 end
69
70 def test_fractional_addition
71 assert_equal "1.25", Numerizer.numerize("one and a quarter")
72 assert_equal "2.375", Numerizer.numerize("two and three eighths")
73 assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
74 end
75
76 def test_word_with_a_number
77 assert_equal "pennyweight", Numerizer.numerize("pennyweight")
78 end
79
80 def test_edges
81 assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
82 end
83
84 def test_multiple_slashes_should_not_be_evaluated
85 assert_equal '11/02/2007', Numerizer.numerize('11/02/2007')
86 end
87
88 def test_compatability
89 assert_equal '1/2', Numerizer.numerize('1/2')
90 assert_equal '05/06', Numerizer.numerize('05/06')
91 assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
92 end
93
94 def test_ordinal_strings
95 {
96 'first' => '1st',
97 'second' => 'second',
98 'third' => '3rd',
99 'fifth' => '5th',
100 'seventh' => '7th',
101 'eighth' => '8th',
102 'tenth' => '10th',
103 'eleventh' => '11th',
104 'twelfth' => '12th',
105 'thirteenth' => '13th',
106 'sixteenth' => '16th',
107 'twentieth' => '20th',
108 'twenty-third' => '23rd',
109 'thirtieth' => '30th',
110 'thirty-first' => '31st',
111 'fourtieth' => '40th',
112 'fourty ninth' => '49th',
113 'fiftieth' => '50th',
114 'sixtieth' => '60th',
115 'seventieth' => '70th',
116 'eightieth' => '80th',
117 'ninetieth' => '90th',
118 'hundredth' => '100th',
119 'thousandth' => '1000th',
120 'millionth' => '1000000th',
121 'billionth' => '1000000000th',
122 'trillionth' => '1000000000000th',
123 'first day month two' => '1st day month 2'
124 }.each do |key, val|
125 assert_equal val, Numerizer.numerize(key)
126 end
127 end
128
129 end
0 require File.join(File.dirname(__FILE__), 'test_helper')
1
2 class NumerizerTestEN < TestCase
3 def test_en_argument
4 assert_equal '12', Numerizer.numerize('twelve', lang: 'en')
5 assert_raises RuntimeError do
6 Numerizer.numerize('twelve', lang: 'english')
7 end
8 end
9
10 def test_straight_parsing
11 strings = {
12 1 => 'one',
13 5 => 'five',
14 10 => 'ten',
15 11 => 'eleven',
16 12 => 'twelve',
17 13 => 'thirteen',
18 14 => 'fourteen',
19 15 => 'fifteen',
20 16 => 'sixteen',
21 17 => 'seventeen',
22 18 => 'eighteen',
23 19 => 'nineteen',
24 20 => 'twenty',
25 27 => 'twenty seven',
26 31 => 'thirty-one',
27 37 => 'thirty-seven',
28 41 => 'forty one',
29 42 => 'fourty two',
30 59 => 'fifty nine',
31 100 => ['one hundred', 'a hundred', 'hundred a'],
32 150 => ['one hundred and fifty', 'one fifty'],
33 219 => ['two hundred and nineteen', 'two hundred nineteen', 'two nineteen'],
34 200 => 'two-hundred',
35 500 => '5 hundred',
36 999 => 'nine hundred and ninety nine',
37 1_000 => 'one thousand',
38 1_200 => ['twelve hundred', 'one thousand two hundred'],
39 17_000 => 'seventeen thousand',
40 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
41 74_002 => 'seventy four thousand and two',
42 99_999 => 'ninety nine thousand nine hundred ninety nine',
43 100_000 => '100 thousand',
44 250_000 => 'two hundred fifty thousand',
45 1_000_000 => 'one million',
46 1_250_007 => 'one million two hundred fifty thousand and seven',
47 1_000_000_000 => 'one billion',
48 1_000_000_001 => 'one billion and one'
49 }
50
51 strings.sort.each do |key, value|
52 Array(value).each do |value|
53 assert_equal key, Numerizer.numerize(value).to_i
54 end
55 end
56
57 assert_equal "1/2", Numerizer.numerize("half")
58 assert_equal "1/4", Numerizer.numerize("quarter")
59 end
60
61 def test_combined_double_digets
62 assert_equal "21", Numerizer.numerize("twentyone")
63 assert_equal "37", Numerizer.numerize("thirtyseven")
64 end
65
66 def test_fractions_in_words
67 assert_equal "1/2", Numerizer.numerize("one half")
68
69 assert_equal "1/4", Numerizer.numerize("1 quarter")
70 assert_equal "1/4", Numerizer.numerize("one quarter")
71 assert_equal "1/4", Numerizer.numerize("a quarter")
72 assert_equal "1/8", Numerizer.numerize("one eighth")
73
74 assert_equal "3/4", Numerizer.numerize("three quarters")
75 assert_equal "2/4", Numerizer.numerize("two fourths")
76 assert_equal "3/8", Numerizer.numerize("three eighths")
77 assert_equal "7/10", Numerizer.numerize("seven tenths")
78 end
79
80 def test_fractional_addition
81 assert_equal "1.25", Numerizer.numerize("one and a quarter")
82 assert_equal "2.375", Numerizer.numerize("two and three eighths")
83 assert_equal "2.5", Numerizer.numerize("two and a half")
84 assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
85 end
86
87 def test_word_with_a_number
88 assert_equal "pennyweight", Numerizer.numerize("pennyweight")
89 end
90
91 def test_edges
92 assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
93 end
94
95 def test_multiple_slashes_should_not_be_evaluated
96 assert_equal '11/02/2007', Numerizer.numerize('11/02/2007')
97 end
98
99 def test_compatability
100 assert_equal '1/2', Numerizer.numerize('1/2')
101 assert_equal '05/06', Numerizer.numerize('05/06')
102 assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
103 assert_equal "1/2 an hour", Numerizer.numerize("half an hour")
104 end
105
106 def test_ordinal_strings
107 {
108 'first' => '1st',
109 'second' => '2nd',
110 'third' => '3rd',
111 'fourth' => '4th',
112 'fifth' => '5th',
113 'seventh' => '7th',
114 'eighth' => '8th',
115 'tenth' => '10th',
116 'eleventh' => '11th',
117 'twelfth' => '12th',
118 'thirteenth' => '13th',
119 'sixteenth' => '16th',
120 'twentieth' => '20th',
121 'twenty-third' => '23rd',
122 'thirtieth' => '30th',
123 'thirty-first' => '31st',
124 'fourtieth' => '40th',
125 'fourty ninth' => '49th',
126 'fiftieth' => '50th',
127 'sixtieth' => '60th',
128 'seventieth' => '70th',
129 'eightieth' => '80th',
130 'ninetieth' => '90th',
131 'hundredth' => '100th',
132 'thousandth' => '1000th',
133 'millionth' => '1000000th',
134 'billionth' => '1000000000th',
135 'trillionth' => '1000000000000th',
136 'first day month two' => '1st day month 2'
137 }.each do |key, val|
138 assert_equal val, Numerizer.numerize(key)
139 end
140 end
141
142 def test_ambiguous_cases
143 # Quarter ( Coin ) is Untested
144 # Second ( Time / Verb ) is Untested
145 assert_equal 'the 4th', Numerizer.numerize('the fourth')
146 assert_equal '1/3 of', Numerizer.numerize('a third of')
147 assert_equal '4th', Numerizer.numerize('fourth')
148 assert_equal '2nd', Numerizer.numerize('second')
149 assert_equal 'I quarter', Numerizer.numerize('I quarter')
150 assert_equal 'You quarter', Numerizer.numerize('You quarter')
151 assert_equal 'I want to quarter', Numerizer.numerize('I want to quarter')
152 assert_equal 'the 1st 1/4', Numerizer.numerize('the first quarter')
153 assert_equal '1/4 pound of beef', Numerizer.numerize('quarter pound of beef')
154 assert_equal 'the 2nd second', Numerizer.numerize('the second second')
155 assert_equal 'the 4th second', Numerizer.numerize('the fourth second')
156 assert_equal '1 second', Numerizer.numerize('one second')
157
158 # TODO: Find way to distinguish this verb
159 # assert_equal 'I peel and quarter bananas', Numerizer.numerize('I peel and quarter bananas')
160 end
161
162 def test_ignore
163 assert_equal 'the second day of march', Numerizer.numerize('the second day of march', ignore: ['second'])
164 assert_equal 'quarter', Numerizer.numerize('quarter', ignore: ['quarter'])
165 assert_equal 'the five guys', Numerizer.numerize('the five guys', ignore: ['five'])
166 assert_equal 'the fifty 2', Numerizer.numerize('the fifty two', ignore: ['fifty'])
167 end
168
169 def test_bias_ordinal
170 assert_equal '4th', Numerizer.numerize('fourth', bias: :ordinal)
171 assert_equal '12th', Numerizer.numerize('twelfth', bias: :ordinal)
172 assert_equal '2nd', Numerizer.numerize('second', bias: :ordinal)
173 assert_equal 'the 4th', Numerizer.numerize('the fourth', bias: :ordinal)
174 assert_equal '2.75', Numerizer.numerize('two and three fourths', bias: :ordinal)
175 assert_equal '3/5', Numerizer.numerize('three fifths', bias: :ordinal)
176 assert_equal 'a 4th of', Numerizer.numerize('a fourth of', bias: :ordinal)
177 assert_equal 'I quarter your home', Numerizer.numerize('I quarter your home', bias: :ordinal)
178 assert_equal 'the 1st 2nd 3rd', Numerizer.numerize('the first second third', bias: :ordinal)
179 end
180
181 def test_bias_fractional
182 assert_equal '1/4', Numerizer.numerize('fourth', bias: :fractional)
183 assert_equal '1/12', Numerizer.numerize('twelfth', bias: :fractional)
184 assert_equal '2nd', Numerizer.numerize('second', bias: :fractional)
185 assert_equal 'the 1/4', Numerizer.numerize('the fourth', bias: :fractional)
186 assert_equal '2.75', Numerizer.numerize('two and three fourths', bias: :fractional)
187 assert_equal '3/5', Numerizer.numerize('three fifths', bias: :fractional)
188 assert_equal '1/4 of', Numerizer.numerize('a fourth of', bias: :fractional)
189 assert_equal 'I 1/4 your home', Numerizer.numerize('I quarter your home', bias: :fractional)
190 assert_equal 'the 1st second 1/3', Numerizer.numerize('the first second third', bias: :fractional)
191 end
192 end