Commit upstream/0.2.0+git20220203.1.b1f7f52 - ruby-numerizer

Import upstream version 0.2.0+git20220203.1.b1f7f52 Debian Janitor 1 year, 3 months ago

13 changed file(s) with 525 addition(s) and 333 deletion(s). Raw diff Collapse all Expand all

-0

Gemfile less more

	0	source "https://rubygems.org"
	1
	2	gemspec

+21

-0

Gemfile.lock less more

	0	PATH
	1	remote: .
	2	specs:
	3	numerizer (0.2.0)
	4
	5	GEM
	6	remote: https://rubygems.org/
	7	specs:
	8	minitest (5.15.0)
	9	rake (13.0.6)
	10
	11	PLATFORMS
	12	ruby
	13
	14	DEPENDENCIES
	15	minitest (~> 5.0)
	16	numerizer!
	17	rake (~> 13)
	18
	19	BUNDLED WITH
	20	2.3.6

-4

README.rdoc less more

0	0	= Numerizer
	1	{<img src="https://github.com/jduff/numerizer/actions/workflows/test.yaml/badge.svg?branch=master" alt="Build Status" />}[https://github.com/jduff/numerizer/actions/workflows/test.yaml?query=branch%3Amaster]
1	2
2		Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.
	3	Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem https://github.com/mojombo/chronic.
3	4
4	5	== Installation
5	6
6		$ sudo gem sources -a http://gemcutter.org
7		$ sudo gem install numerizer
	7	$ gem install numerizer
8	8
9	9	== Usage
10	10

20	20	=> "2.375"
21	21
22	22	== Contributors
23		Tom Preston-Werner, John Duff⏎
	23	Tom Preston-Werner, John Duff

+25

-30

Rakefile less more

0	0	require 'rubygems'
1	1	require 'rake'
2	2
3		begin
4		require 'jeweler'
5		Jeweler::Tasks.new do \|gem\|
6		gem.name = "numerizer"
7		gem.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)."
8		gem.description = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic."
9		gem.email = "duff.john@gmail.com"
10		gem.homepage = "http://github.com/jduff/numerizer"
11		gem.license = 'MIT'
12		gem.authors = ["John Duff"]
13		# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14		end
15		Jeweler::GemcutterTasks.new
16		rescue LoadError
17		puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
	3	$:.unshift File.expand_path('../lib', __FILE__)
	4	require 'numerizer/version'
	5
	6	def version
	7	Numerizer::VERSION
18	8	end
19	9
20	10	require 'rake/testtask'

36	26	end
37	27	end
38	28
39		# task :test => :check_dependencies
	29	desc "Release Numerizer version #{version}"
	30	task :release => :build do
	31	unless `git branch` =~ /^\* master$/
	32	puts "You must be on the master branch to release!"
	33	exit!
	34	end
	35	sh "git commit --allow-empty -a -m 'Release #{version}'"
	36	sh "git tag v#{version}"
	37	sh "git push origin master"
	38	sh "git push origin v#{version}"
	39	sh "gem push pkg/numerizer-#{version}.gem"
	40	end
	41
	42	desc 'Build a gem from the gemspec'
	43	task :build do
	44	FileUtils.mkdir_p 'pkg'
	45	sh 'gem build numerizer.gemspec'
	46	FileUtils.mv("./numerizer-#{version}.gem", "pkg")
	47	end
	48
40	49
41	50	task :default => :test
42
43		# require 'rake/rdoctask'
44		# Rake::RDocTask.new do \|rdoc\|
45		# if File.exist?('VERSION')
46		# version = File.read('VERSION')
47		# else
48		# version = ""
49		# end
50
51		# rdoc.rdoc_dir = 'rdoc'
52		# rdoc.title = "numerizer #{version}"
53		# rdoc.rdoc_files.include('README*')
54		# rdoc.rdoc_files.include('lib/*/.rb')
55		# end

-1

~~VERSION~~ less more

0.2.0⏎

-0

lib/numerizer/version.rb less more

	0	class Numerizer
	1	VERSION = '0.2.0'
	2	end

+10

-147

lib/numerizer.rb less more

9	9	#
10	10	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11	11
12		require 'strscan'
	12	require 'numerizer/version'
	13	require 'providers/english_provider'
	14	require 'set'
13	15
14	16	class Numerizer
15	17
16		DIRECT_NUMS = [
17		['eleven', '11'],
18		['twelve', '12'],
19		['thirteen', '13'],
20		['fourteen', '14'],
21		['fifteen', '15'],
22		['sixteen', '16'],
23		['seventeen', '17'],
24		['eighteen', '18'],
25		['nineteen', '19'],
26		['ninteen', '19'], # Common mis-spelling
27		['zero', '0'],
28		['ten', '10'],
29		['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
30		]
	18	PROVIDERS = {'en' => EnglishProvider.new}
31	19
32		SINGLE_NUMS = [
33		['one', 1],
34		['two', 2],
35		['three', 3],
36		['four', 4],
37		['five', 5],
38		['six', 6],
39		['seven', 7],
40		['eight', 8],
41		['nine', 9]
42		]
43
44		TEN_PREFIXES = [
45		['twenty', 20],
46		['thirty', 30],
47		['forty', 40],
48		['fourty', 40], # Common misspelling
49		['fifty', 50],
50		['sixty', 60],
51		['seventy', 70],
52		['eighty', 80],
53		['ninety', 90]
54		]
55
56		BIG_PREFIXES = [
57		['hundred', 100],
58		['thousand', 1000],
59		['million', 1_000_000],
60		['billion', 1_000_000_000],
61		['trillion', 1_000_000_000_000],
62		]
63
64		FRACTIONS = [
65		['half', 2],
66		['third(s)?', 3],
67		['fourth(s)?', 4],
68		['quarter(s)?', 4],
69		['fifth(s)?', 5],
70		['sixth(s)?', 6],
71		['seventh(s)?', 7],
72		['eighth(s)?', 8],
73		['nineth(s)?', 9],
74		]
75
76		SINGLE_ORDINALS = [
77		['first', 1],
78		['third', 3],
79		['fourth', 4],
80		['fifth', 5],
81		['sixth', 6],
82		['seventh', 7],
83		['eighth', 8],
84		['ninth', 9]
85		]
86
87		DIRECT_ORDINALS = [
88		['tenth', '10'],
89		['eleventh', '11'],
90		['twelfth', '12'],
91		['thirteenth', '13'],
92		['fourteenth', '14'],
93		['fifteenth', '15'],
94		['sixteenth', '16'],
95		['seventeenth', '17'],
96		['eighteenth', '18'],
97		['nineteenth', '19'],
98		['twentieth', '20'],
99		['thirtieth', '30'],
100		['fourtieth', '40'],
101		['fiftieth', '50'],
102		['sixtieth', '60'],
103		['seventieth', '70'],
104		['eightieth', '80'],
105		['ninetieth', '90']
106		]
107
108		def self.numerize(string)
	20	def self.numerize(string,lang: 'en', ignore: [], bias: :none)
109	21	string = string.dup
110
111		# preprocess
112		string.gsub!(/ +\|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words
113
114		# easy/direct replacements
115		(DIRECT_NUMS + SINGLE_NUMS).each do \|dn\|
116		string.gsub!(/(^\|\W)#{dn[0]}(?=$\|\W)/i, '\1<num>' + dn[1].to_s)
	22	ignore = ignore.map(&:downcase).to_set
	23	provider = PROVIDERS[lang]
	24	if provider == nil
	25	raise "Language #{lang} not found. Language options include #{PROVIDERS.keys}"
117	26	end
118
119		# ten, twenty, etc.
120		TEN_PREFIXES.each do \|tp\|
121		SINGLE_NUMS.each do \|dn\|
122		string.gsub!(/(^\|\W)#{tp[0]}#{dn[0]}(?=$\|\W)/i, '\1<num>' + (tp[1] + dn[1]).to_s)
123		end
124		SINGLE_ORDINALS.each do \|dn\|
125		string.gsub!(/(^\|\W)#{tp[0]}(\s)?#{dn[0]}(?=$\|\W)/i, '\1<num>' + (tp[1] + dn[1]).to_s + dn[0][-2, 2])
126		end
127		string.gsub!(/(^\|\W)#{tp[0]}(?=$\|\W)/i, '\1<num>' + tp[1].to_s)
128		end
129
130		# handle fractions
131		FRACTIONS.each do \|tp\|
132		string.gsub!(/a #{tp[0]}(?=$\|\W)/i, '<num>1/' + tp[1].to_s)
133		string.gsub!(/\s#{tp[0]}(?=$\|\W)/i, '/' + tp[1].to_s)
134		end
135
136		(DIRECT_ORDINALS + SINGLE_ORDINALS).each do \|on\|
137		string.gsub!(/(^\|\W)#{on[0]}(?=$\|\W)/i, '\1<num>' + on[1].to_s + on[0][-2, 2])
138		end
139
140		# evaluate fractions when preceded by another number
141		string.gsub!(/(\d+)(?: \| and \|-)+(<num>\|\s)(\d+)\s\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s }
142
143		# hundreds, thousands, millions, etc.
144		BIG_PREFIXES.each do \|bp\|
145		string.gsub!(/(?:<num>)?(\d) #{bp[0]}/i) { $1.empty? ? bp[1] : '<num>' + (bp[1] * $1.to_i).to_s }
146		andition(string)
147		end
148
149		andition(string)
150
151		string.gsub(/<num>/, '')
152		end
153
154		class << self
155		private
156		def andition(string)
157		sc = StringScanner.new(string)
158		while(sc.scan_until(/<num>(\d+)( \| and )<num>(\d+)(?=[^\w]\|$)/i))
159		if sc[2] =~ /and/ \|\| sc[1].size > sc[3].size
160		string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
161		sc.reset
162		end
163		end
164		end
	27	provider.numerize(string, ignore: ignore, bias: bias)
165	28	end
166	29
167	30	end

+43

-0

lib/provider.rb less more

	0	class GenericProvider
	1
	2	def numerize(str, ignore: [], bias: :none)
	3	preprocess(str, ignore)
	4	numerize_numerals(str, ignore, bias)
	5	numerize_fractions(str, ignore, bias)
	6	numerize_ordinals(str, ignore, bias)
	7	numerize_big_prefixes(str, ignore, bias)
	8	postprocess(str, ignore)
	9	end
	10
	11	private
	12
	13	def preprocess(str, ignore)
	14	raise 'must be implemented in subclass'
	15	end
	16	def numerize_numerals(str, ignore, bias)
	17	raise 'must be implemented in subclass'
	18	end
	19	def numerize_fractions(str, ignore, bias)
	20	raise 'must be implemented in subclass'
	21	end
	22	def numerize_ordinals(str, ignore, bias)
	23	raise 'must be implemented in subclass'
	24	end
	25	def numerize_big_prefixes(str, ignore, bias)
	26	raise 'must be implemented in subclass'
	27	end
	28	def postprocess(str, ignore)
	29	raise 'must be implemented in subclass'
	30	end
	31
	32	# Turns list of words into a unionized list, ignoring words specified in
	33	# arguments or that meet the conditions of the yield block
	34	def regexify(words, ignore:[])
	35	if block_given?
	36	return Regexp.union(words.reject { \|x\| ignore.include?(x) \|\| yield(x) })
	37	else
	38	return Regexp.union(words.reject { \|x\| ignore.include?(x) })
	39	end
	40	end
	41
	42	end

+212

-0

lib/providers/english_provider.rb less more

	0	require 'provider'
	1	require 'strscan'
	2
	3	class EnglishProvider < GenericProvider
	4
	5	DIRECT_NUMS = {
	6	'eleven' => '11',
	7	'twelve' => '12',
	8	'thirteen' => '13',
	9	'fourteen' => '14',
	10	'fifteen' => '15',
	11	'sixteen' => '16',
	12	'seventeen' => '17',
	13	'eighteen' => '18',
	14	'nineteen' => '19',
	15	'ninteen' => '19',
	16	'zero' => '0',
	17	'ten' => '10',
	18	}
	19
	20	SINGLE_NUMS = {
	21	'one' => 1,
	22	'two' => 2,
	23	'three' => 3,
	24	'four' => 4,
	25	'five' => 5,
	26	'six' => 6,
	27	'seven' => 7,
	28	'eight' => 8,
	29	'nine' => 9
	30	}
	31
	32	TEN_PREFIXES = {
	33	'twenty' => 20,
	34	'thirty' => 30,
	35	'forty' => 40,
	36	'fourty' => 40,
	37	'fifty' => 50,
	38	'sixty' => 60,
	39	'seventy' => 70,
	40	'eighty' => 80,
	41	'ninety' => 90
	42	}
	43
	44	BIG_PREFIXES = {
	45	'hundred' => 100,
	46	'thousand' => 1000,
	47	'million' => 1_000_000,
	48	'billion' => 1_000_000_000,
	49	'trillion' => 1_000_000_000_000,
	50	}
	51
	52	FRACTIONS = {
	53	'half' => 2,
	54	'halves' => 2,
	55	'quarter' => 4,
	56	'quarters' => 4
	57	}
	58
	59	ORDINALS = {
	60	'first' => 1,
	61	'second' => 2,
	62	}
	63
	64	SINGLE_ORDINAL_FRACTIONALS = {
	65	'third' => 3,
	66	'fourth' => 4,
	67	'fifth' => 5,
	68	'sixth' => 6,
	69	'seventh' => 7,
	70	'eighth' => 8,
	71	'ninth' => 9,
	72	}
	73
	74	DIRECT_ORDINAL_FRACTIONALS = {
	75	'tenth' => '10',
	76	'eleventh' => '11',
	77	'twelfth' => '12',
	78	'thirteenth' => '13',
	79	'fourteenth' => '14',
	80	'fifteenth' => '15',
	81	'sixteenth' => '16',
	82	'seventeenth' => '17',
	83	'eighteenth' => '18',
	84	'nineteenth' => '19',
	85	'twentieth' => '20',
	86	'thirtieth' => '30',
	87	'fourtieth' => '40',
	88	'fiftieth' => '50',
	89	'sixtieth' => '60',
	90	'seventieth' => '70',
	91	'eightieth' => '80',
	92	'ninetieth' => '90'
	93	}
	94
	95	ALL_ORDINALS = ORDINALS.merge(SINGLE_ORDINAL_FRACTIONALS).merge(DIRECT_ORDINAL_FRACTIONALS)
	96	ONLY_PLURAL_FRACTIONS = FRACTIONS.merge((SINGLE_ORDINAL_FRACTIONALS.merge(DIRECT_ORDINAL_FRACTIONALS)).inject({ }) {\|h, (k,v)\| h[k + 's'] = v ; h})
	97	ALL_FRACTIONS = ONLY_PLURAL_FRACTIONS.merge(SINGLE_ORDINAL_FRACTIONALS).merge(DIRECT_ORDINAL_FRACTIONALS)
	98
	99	DIRECT_SINGLE_NUMS = DIRECT_NUMS.merge(SINGLE_NUMS)
	100	DIRECT_NUMS_TEN_PREFIXES = DIRECT_NUMS.merge(TEN_PREFIXES)
	101	ORDINAL_SINGLE = ORDINALS.merge(SINGLE_ORDINAL_FRACTIONALS)
	102
	103	# REGEXP.UNION here breaks insertion into negative Lookbehind
	104	ALL_ORDINALS_REGEX = ALL_ORDINALS.keys.reduce {\|a,b\| a + '\|' + b}
	105	PRONOUNS = ['i','you','he','she','we','it','you','they','to','the'].reduce {\|a,b\| a + '\|' + b}
	106
	107	def preprocess(string, ignore)
	108	string.gsub!(/ +\|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words
	109	string.gsub!(/\ba$/, '') && string.rstrip! # doesn't make sense for an 'a' at the end to be a 1
	110	end
	111
	112	def numerize_numerals(string, ignore, bias)
	113	single_nums = regexify(SINGLE_NUMS.keys, ignore: ignore)
	114	dir_single_nums = regexify(DIRECT_SINGLE_NUMS.keys, ignore: ignore)
	115	ten_prefs = regexify(TEN_PREFIXES.keys, ignore: ignore)
	116	dir_nums_ten_prefs = regexify(DIRECT_NUMS_TEN_PREFIXES.keys, ignore: ignore)
	117	single_ords = regexify(ORDINAL_SINGLE.keys, ignore: ignore)
	118
	119	# easy/direct replacements
	120	string.gsub!(/(^\|\W)(#{single_nums})\s(#{dir_nums_ten_prefs})(?=$\|\W)/i) {$1 << $2 << ' hundred ' << $3}
	121	string.gsub!(/(^\|\W)(#{dir_single_nums})(?=$\|\W)/i) { $1 << '<num>' << DIRECT_SINGLE_NUMS[$2].to_s}
	122	if bias == :ordinal
	123	string.gsub!(/(^\|\W)\ba\b(?=$\|\W)(?! (?:#{ALL_ORDINALS_REGEX}))/i, '\1<num>' + 1.to_s)
	124	else
	125	string.gsub!(/(^\|\W)\ba\b(?=$\|\W)/i, '\1<num>' + 1.to_s)
	126	end
	127
	128	# ten, twenty, etc.
	129	string.gsub!(/(^\|\W)(#{ten_prefs})(#{single_nums})(?=$\|\W)/i) { $1 << '<num>' << (TEN_PREFIXES[$2] + SINGLE_NUMS[$3]).to_s}
	130	string.gsub!(/(^\|\W)(#{ten_prefs})(\s)?(#{single_ords})(?=$\|\W)/i) { $1 << '<num>' << (TEN_PREFIXES[$2] + ORDINAL_SINGLE[$4]).to_s << $4[-2, 2]}
	131	string.gsub!(/(^\|\W)(#{ten_prefs})(?=$\|\W)/i) { $1 << '<num>' << TEN_PREFIXES[$2].to_s}
	132	end
	133
	134	def numerize_fractions(string, ignore, bias)
	135	# handle fractions
	136	# only plural fractions if ordinal mode
	137	# Ignore quarter to be handled seperately if not fractional mode
	138	if bias == :ordinal
	139	fractionals = regexify(ONLY_PLURAL_FRACTIONS.keys, ignore: ignore + ['quarter', 'quarters'])
	140	elsif bias == :fractional
	141	fractionals = regexify(ALL_FRACTIONS.keys, ignore: ignore)
	142	else
	143	fractionals = regexify(ALL_FRACTIONS.keys, ignore: ignore + ['quarter', 'quarters'])
	144	end
	145	quarters = regexify(['quarter', 'quarters'], ignore: ignore)
	146
	147	string.gsub!(/a (#{fractionals})(?=$\|\W)/i) {'<num>1/' << ALL_FRACTIONS[$1].to_s}
	148	# TODO : Find Noun Distinction for Quarter
	149	if bias == :fractional
	150	string.gsub!(/(^\|\W)(#{fractionals})(?=$\|\W)/i) {'/' << ALL_FRACTIONS[$2].to_s}
	151	else
	152	string.gsub!(/(?<!the\|^)(\W)(#{fractionals})(?=$\|\W)/i) { '/' << ALL_FRACTIONS[$2].to_s }
	153	string.gsub!(/(?<!#{PRONOUNS})(^\|\W)(#{quarters})(?=$\|\W)/i) { '/' << ALL_FRACTIONS[$2].to_s }
	154	end
	155	cleanup_fractions(string)
	156	end
	157
	158
	159	def numerize_ordinals(string, ignore, bias)
	160	return if bias == :fractionals
	161	all_ords = regexify(ALL_ORDINALS.keys, ignore: ignore) {\|x\| x == 'second' && bias != :ordinal }
	162	if bias != :ordinal && !ignore.include?('second')
	163	string.gsub!(/(?<!second\|\d\|#{ALL_ORDINALS_REGEX})(^\|\W)second(?=$\|\W)/i) { $1 << '<num>' << ALL_ORDINALS['second'].to_s << 'second'[-2, 2] }
	164	end
	165	string.gsub!(/(^\|\W)(#{all_ords})(?=$\|\W)/i) { $1 << '<num>' << ALL_ORDINALS[$2].to_s << $2[-2, 2]}
	166	end
	167
	168	# hundreds, thousands, millions, etc.
	169	def numerize_big_prefixes(string, ignore, bias)
	170	# big_prefs = regexify(BIG_PREFIXES.keys, ignore: ignore)
	171	BIG_PREFIXES.each do \|k,v\|
	172	next if ignore.include? k.downcase
	173	string.gsub!(/(?:<num>)?(\d) #{k}/i) { $1.empty? ? v : '<num>' << (v * $1.to_i).to_s }
	174	andition(string)
	175	end
	176	end
	177
	178	def postprocess(string, ignore)
	179	andition(string)
	180	numerize_halves(string, ignore)
	181	#Strip Away Added Num Tags
	182	string.gsub(/<num>/, '')
	183	end
	184
	185	private
	186
	187	def cleanup_fractions(string)
	188	# evaluate fractions when preceded by another number
	189	string.gsub!(/(\d+)(?: \| and \|-)+(<num>\|\s)(\d+)\s\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s }
	190	# fix unpreceeded fractions
	191	string.gsub!(/(?:^\|\W)\/(\d+)/, '1/\1')
	192	string.gsub!(/(?<=[a-zA-Z])\/(\d+)/, ' 1/\1')
	193	end
	194
	195	# always substitute halfs
	196	def numerize_halves(string, ignore)
	197	return if ignore.include? 'half'
	198	string.gsub!(/\bhalf\b/i, '1/2')
	199	end
	200
	201	def andition(string)
	202	sc = StringScanner.new(string)
	203	while(sc.scan_until(/<num>(\d+)( \| and )<num>(\d+)(?=[^\w]\|$)/i))
	204	if sc[2] =~ /and/ \|\| sc[1].size > sc[3].size
	205	string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' << (sc[1].to_i + sc[3].to_i).to_s
	206	sc.reset
	207	end
	208	end
	209	end
	210
	211	end

-17

numerizer.gemspec less more

0		# Generated by jeweler
1		# DO NOT EDIT THIS FILE DIRECTLY
2		# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
3		# -- encoding: utf-8 --
4		# stub: numerizer 0.2.0 ruby lib
	0	$:.unshift File.expand_path('../lib', __FILE__)
	1	require 'numerizer/version'
5	2
6	3	Gem::Specification.new do \|s\|
7	4	s.name = "numerizer"
8		s.version = "0.2.0"
	5	s.version = Numerizer::VERSION
9	6
10	7	s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11	8	s.require_paths = ["lib"]

17	14	"LICENSE",
18	15	"README.rdoc"
19	16	]
20		s.files = [
21		".document",
22		"LICENSE",
23		"README.rdoc",
24		"Rakefile",
25		"VERSION",
26		"lib/numerizer.rb",
27		"numerizer.gemspec",
28		"test/test_helper.rb",
29		"test/test_numerizer.rb"
30		]
	17	s.files = `git ls-files`.split($/)
	18	s.test_files = `git ls-files -- test`.split($/)
31	19	s.homepage = "http://github.com/jduff/numerizer"
32	20	s.licenses = ["MIT"]
33	21	s.rubygems_version = "2.2.2"
34	22	s.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)."
	23
	24	s.add_development_dependency 'rake', '~> 13'
	25	s.add_development_dependency 'minitest', '~> 5.0'
35	26	end
36	27

-4

test/test_helper.rb less more

0		require 'rubygems'
1		require 'test/unit'
2
3	0	$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
4	1	$LOAD_PATH.unshift(File.dirname(__FILE__))
5	2	require 'numerizer'
6	3
7		class Test::Unit::TestCase
	4	require 'minitest/autorun'
	5
	6	class TestCase < Minitest::Test
8	7	end

-130

~~test/test_numerizer.rb~~ less more

0		require File.join(File.dirname(__FILE__), 'test_helper')
1
2		class NumerizerTest < Test::Unit::TestCase
3		def test_straight_parsing
4		strings = {
5		1 => 'one',
6		5 => 'five',
7		10 => 'ten',
8		11 => 'eleven',
9		12 => 'twelve',
10		13 => 'thirteen',
11		14 => 'fourteen',
12		15 => 'fifteen',
13		16 => 'sixteen',
14		17 => 'seventeen',
15		18 => 'eighteen',
16		19 => 'nineteen',
17		20 => 'twenty',
18		27 => 'twenty seven',
19		31 => 'thirty-one',
20		37 => 'thirty-seven',
21		41 => 'forty one',
22		42 => 'fourty two',
23		59 => 'fifty nine',
24		100 => 'a hundred',
25		100 => 'one hundred',
26		150 => 'one hundred and fifty',
27		# 150 => 'one fifty',
28		200 => 'two-hundred',
29		500 => '5 hundred',
30		999 => 'nine hundred and ninety nine',
31		1_000 => 'one thousand',
32		1_200 => 'twelve hundred',
33		1_200 => 'one thousand two hundred',
34		17_000 => 'seventeen thousand',
35		21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
36		74_002 => 'seventy four thousand and two',
37		99_999 => 'ninety nine thousand nine hundred ninety nine',
38		100_000 => '100 thousand',
39		250_000 => 'two hundred fifty thousand',
40		1_000_000 => 'one million',
41		1_250_007 => 'one million two hundred fifty thousand and seven',
42		1_000_000_000 => 'one billion',
43		1_000_000_001 => 'one billion and one'
44		}
45
46		strings.keys.sort.each do \|key\|
47		assert_equal key, Numerizer.numerize(strings[key]).to_i
48		end
49
50		assert_equal "2.5", Numerizer.numerize("two and a half")
51		assert_equal "1/2", Numerizer.numerize("one half")
52		end
53
54		def test_combined_double_digets
55		assert_equal "21", Numerizer.numerize("twentyone")
56		assert_equal "37", Numerizer.numerize("thirtyseven")
57		end
58
59		def test_fractions_in_words
60		assert_equal "1/4", Numerizer.numerize("1 quarter")
61		assert_equal "1/4", Numerizer.numerize("one quarter")
62		assert_equal "1/4", Numerizer.numerize("a quarter")
63		assert_equal "1/8", Numerizer.numerize("one eighth")
64
65		assert_equal "3/4", Numerizer.numerize("three quarters")
66		assert_equal "2/4", Numerizer.numerize("two fourths")
67		assert_equal "3/8", Numerizer.numerize("three eighths")
68		end
69
70		def test_fractional_addition
71		assert_equal "1.25", Numerizer.numerize("one and a quarter")
72		assert_equal "2.375", Numerizer.numerize("two and three eighths")
73		assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
74		end
75
76		def test_word_with_a_number
77		assert_equal "pennyweight", Numerizer.numerize("pennyweight")
78		end
79
80		def test_edges
81		assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
82		end
83
84		def test_multiple_slashes_should_not_be_evaluated
85		assert_equal '11/02/2007', Numerizer.numerize('11/02/2007')
86		end
87
88		def test_compatability
89		assert_equal '1/2', Numerizer.numerize('1/2')
90		assert_equal '05/06', Numerizer.numerize('05/06')
91		assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
92		end
93
94		def test_ordinal_strings
95		{
96		'first' => '1st',
97		'second' => 'second',
98		'third' => '3rd',
99		'fifth' => '5th',
100		'seventh' => '7th',
101		'eighth' => '8th',
102		'tenth' => '10th',
103		'eleventh' => '11th',
104		'twelfth' => '12th',
105		'thirteenth' => '13th',
106		'sixteenth' => '16th',
107		'twentieth' => '20th',
108		'twenty-third' => '23rd',
109		'thirtieth' => '30th',
110		'thirty-first' => '31st',
111		'fourtieth' => '40th',
112		'fourty ninth' => '49th',
113		'fiftieth' => '50th',
114		'sixtieth' => '60th',
115		'seventieth' => '70th',
116		'eightieth' => '80th',
117		'ninetieth' => '90th',
118		'hundredth' => '100th',
119		'thousandth' => '1000th',
120		'millionth' => '1000000th',
121		'billionth' => '1000000000th',
122		'trillionth' => '1000000000000th',
123		'first day month two' => '1st day month 2'
124		}.each do \|key, val\|
125		assert_equal val, Numerizer.numerize(key)
126		end
127		end
128
129		end

+193

-0

test/test_numerizer_en.rb less more

	0	require File.join(File.dirname(__FILE__), 'test_helper')
	1
	2	class NumerizerTestEN < TestCase
	3	def test_en_argument
	4	assert_equal '12', Numerizer.numerize('twelve', lang: 'en')
	5	assert_raises RuntimeError do
	6	Numerizer.numerize('twelve', lang: 'english')
	7	end
	8	end
	9
	10	def test_straight_parsing
	11	strings = {
	12	1 => 'one',
	13	5 => 'five',
	14	10 => 'ten',
	15	11 => 'eleven',
	16	12 => 'twelve',
	17	13 => 'thirteen',
	18	14 => 'fourteen',
	19	15 => 'fifteen',
	20	16 => 'sixteen',
	21	17 => 'seventeen',
	22	18 => 'eighteen',
	23	19 => 'nineteen',
	24	20 => 'twenty',
	25	27 => 'twenty seven',
	26	31 => 'thirty-one',
	27	37 => 'thirty-seven',
	28	41 => 'forty one',
	29	42 => 'fourty two',
	30	59 => 'fifty nine',
	31	100 => ['one hundred', 'a hundred', 'hundred a'],
	32	150 => ['one hundred and fifty', 'one fifty'],
	33	219 => ['two hundred and nineteen', 'two hundred nineteen', 'two nineteen'],
	34	200 => 'two-hundred',
	35	500 => '5 hundred',
	36	999 => 'nine hundred and ninety nine',
	37	1_000 => 'one thousand',
	38	1_200 => ['twelve hundred', 'one thousand two hundred'],
	39	17_000 => 'seventeen thousand',
	40	21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
	41	74_002 => 'seventy four thousand and two',
	42	99_999 => 'ninety nine thousand nine hundred ninety nine',
	43	100_000 => '100 thousand',
	44	250_000 => 'two hundred fifty thousand',
	45	1_000_000 => 'one million',
	46	1_250_007 => 'one million two hundred fifty thousand and seven',
	47	1_000_000_000 => 'one billion',
	48	1_000_000_001 => 'one billion and one'
	49	}
	50
	51	strings.sort.each do \|key, value\|
	52	Array(value).each do \|value\|
	53	assert_equal key, Numerizer.numerize(value).to_i
	54	end
	55	end
	56
	57	assert_equal "1/2", Numerizer.numerize("half")
	58	assert_equal "1/4", Numerizer.numerize("quarter")
	59	end
	60
	61	def test_combined_double_digets
	62	assert_equal "21", Numerizer.numerize("twentyone")
	63	assert_equal "37", Numerizer.numerize("thirtyseven")
	64	end
	65
	66	def test_fractions_in_words
	67	assert_equal "1/2", Numerizer.numerize("one half")
	68
	69	assert_equal "1/4", Numerizer.numerize("1 quarter")
	70	assert_equal "1/4", Numerizer.numerize("one quarter")
	71	assert_equal "1/4", Numerizer.numerize("a quarter")
	72	assert_equal "1/8", Numerizer.numerize("one eighth")
	73
	74	assert_equal "3/4", Numerizer.numerize("three quarters")
	75	assert_equal "2/4", Numerizer.numerize("two fourths")
	76	assert_equal "3/8", Numerizer.numerize("three eighths")
	77	assert_equal "7/10", Numerizer.numerize("seven tenths")
	78	end
	79
	80	def test_fractional_addition
	81	assert_equal "1.25", Numerizer.numerize("one and a quarter")
	82	assert_equal "2.375", Numerizer.numerize("two and three eighths")
	83	assert_equal "2.5", Numerizer.numerize("two and a half")
	84	assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
	85	end
	86
	87	def test_word_with_a_number
	88	assert_equal "pennyweight", Numerizer.numerize("pennyweight")
	89	end
	90
	91	def test_edges
	92	assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
	93	end
	94
	95	def test_multiple_slashes_should_not_be_evaluated
	96	assert_equal '11/02/2007', Numerizer.numerize('11/02/2007')
	97	end
	98
	99	def test_compatability
	100	assert_equal '1/2', Numerizer.numerize('1/2')
	101	assert_equal '05/06', Numerizer.numerize('05/06')
	102	assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
	103	assert_equal "1/2 an hour", Numerizer.numerize("half an hour")
	104	end
	105
	106	def test_ordinal_strings
	107	{
	108	'first' => '1st',
	109	'second' => '2nd',
	110	'third' => '3rd',
	111	'fourth' => '4th',
	112	'fifth' => '5th',
	113	'seventh' => '7th',
	114	'eighth' => '8th',
	115	'tenth' => '10th',
	116	'eleventh' => '11th',
	117	'twelfth' => '12th',
	118	'thirteenth' => '13th',
	119	'sixteenth' => '16th',
	120	'twentieth' => '20th',
	121	'twenty-third' => '23rd',
	122	'thirtieth' => '30th',
	123	'thirty-first' => '31st',
	124	'fourtieth' => '40th',
	125	'fourty ninth' => '49th',
	126	'fiftieth' => '50th',
	127	'sixtieth' => '60th',
	128	'seventieth' => '70th',
	129	'eightieth' => '80th',
	130	'ninetieth' => '90th',
	131	'hundredth' => '100th',
	132	'thousandth' => '1000th',
	133	'millionth' => '1000000th',
	134	'billionth' => '1000000000th',
	135	'trillionth' => '1000000000000th',
	136	'first day month two' => '1st day month 2'
	137	}.each do \|key, val\|
	138	assert_equal val, Numerizer.numerize(key)
	139	end
	140	end
	141
	142	def test_ambiguous_cases
	143	# Quarter ( Coin ) is Untested
	144	# Second ( Time / Verb ) is Untested
	145	assert_equal 'the 4th', Numerizer.numerize('the fourth')
	146	assert_equal '1/3 of', Numerizer.numerize('a third of')
	147	assert_equal '4th', Numerizer.numerize('fourth')
	148	assert_equal '2nd', Numerizer.numerize('second')
	149	assert_equal 'I quarter', Numerizer.numerize('I quarter')
	150	assert_equal 'You quarter', Numerizer.numerize('You quarter')
	151	assert_equal 'I want to quarter', Numerizer.numerize('I want to quarter')
	152	assert_equal 'the 1st 1/4', Numerizer.numerize('the first quarter')
	153	assert_equal '1/4 pound of beef', Numerizer.numerize('quarter pound of beef')
	154	assert_equal 'the 2nd second', Numerizer.numerize('the second second')
	155	assert_equal 'the 4th second', Numerizer.numerize('the fourth second')
	156	assert_equal '1 second', Numerizer.numerize('one second')
	157
	158	# TODO: Find way to distinguish this verb
	159	# assert_equal 'I peel and quarter bananas', Numerizer.numerize('I peel and quarter bananas')
	160	end
	161
	162	def test_ignore
	163	assert_equal 'the second day of march', Numerizer.numerize('the second day of march', ignore: ['second'])
	164	assert_equal 'quarter', Numerizer.numerize('quarter', ignore: ['quarter'])
	165	assert_equal 'the five guys', Numerizer.numerize('the five guys', ignore: ['five'])
	166	assert_equal 'the fifty 2', Numerizer.numerize('the fifty two', ignore: ['fifty'])
	167	end
	168
	169	def test_bias_ordinal
	170	assert_equal '4th', Numerizer.numerize('fourth', bias: :ordinal)
	171	assert_equal '12th', Numerizer.numerize('twelfth', bias: :ordinal)
	172	assert_equal '2nd', Numerizer.numerize('second', bias: :ordinal)
	173	assert_equal 'the 4th', Numerizer.numerize('the fourth', bias: :ordinal)
	174	assert_equal '2.75', Numerizer.numerize('two and three fourths', bias: :ordinal)
	175	assert_equal '3/5', Numerizer.numerize('three fifths', bias: :ordinal)
	176	assert_equal 'a 4th of', Numerizer.numerize('a fourth of', bias: :ordinal)
	177	assert_equal 'I quarter your home', Numerizer.numerize('I quarter your home', bias: :ordinal)
	178	assert_equal 'the 1st 2nd 3rd', Numerizer.numerize('the first second third', bias: :ordinal)
	179	end
	180
	181	def test_bias_fractional
	182	assert_equal '1/4', Numerizer.numerize('fourth', bias: :fractional)
	183	assert_equal '1/12', Numerizer.numerize('twelfth', bias: :fractional)
	184	assert_equal '2nd', Numerizer.numerize('second', bias: :fractional)
	185	assert_equal 'the 1/4', Numerizer.numerize('the fourth', bias: :fractional)
	186	assert_equal '2.75', Numerizer.numerize('two and three fourths', bias: :fractional)
	187	assert_equal '3/5', Numerizer.numerize('three fifths', bias: :fractional)
	188	assert_equal '1/4 of', Numerizer.numerize('a fourth of', bias: :fractional)
	189	assert_equal 'I 1/4 your home', Numerizer.numerize('I quarter your home', bias: :fractional)
	190	assert_equal 'the 1st second 1/3', Numerizer.numerize('the first second third', bias: :fractional)
	191	end
	192	end