Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Number parsing finished!

  • Loading branch information...
commit c928736b8b5222dfe008356f9662f141c16d50cd 1 parent ad8562a
Cameron Dutro authored
View
1  lib/twitter_cldr.rb
@@ -23,6 +23,7 @@ module TwitterCldr
autoload :Collation, 'twitter_cldr/collation'
autoload :Localized, 'twitter_cldr/localized'
autoload :Normalization, 'twitter_cldr/normalization'
+ autoload :Parsers, 'twitter_cldr/parsers'
autoload :Resources, 'twitter_cldr/resources'
autoload :Shared, 'twitter_cldr/shared'
autoload :Tokenizers, 'twitter_cldr/tokenizers'
View
14 lib/twitter_cldr/localized/localized_string.rb
@@ -36,6 +36,20 @@ def to_s
@base_obj.dup
end
+ def to_i(options = {})
+ to_f(options).to_i
+ end
+
+ def to_f(options = {})
+ if TwitterCldr::Parsers::NumberParser.is_numeric?(@base_obj)
+ TwitterCldr::Parsers::NumberParser.new(@locale).parse(@base_obj, options)
+ else
+ @base_obj.to_f
+ end
+ rescue
+ @base_obj.to_f
+ end
+
def size
code_points.size
end
View
10 lib/twitter_cldr/parsers.rb
@@ -0,0 +1,10 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+module TwitterCldr
+ module Parsers
+ autoload :NumberParser, 'twitter_cldr/parsers/number_parser'
+ end
+end
View
113 lib/twitter_cldr/parsers/number_parser.rb
@@ -0,0 +1,113 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+module TwitterCldr
+ module Parsers
+
+ class InvalidNumberError < StandardError; end
+
+ class NumberParser
+
+ SEPARATOR_CHARS = ['.', ',', ' '].map do |char|
+ char == ' ' ? '\s' : Regexp.escape(char)
+ end.join
+
+ def initialize(locale = TwitterCldr.locale)
+ @locale = locale
+ end
+
+ def parse(number_text, options = {})
+ options[:strict] = true unless options.include?(:strict)
+ group, decimal = separators(options[:strict])
+ tokens = tokenize(number_text, group, decimal)
+
+ num_list, punct_list = tokens.partition { |t| t[:type] == :numeric }
+ raise InvalidNumberError unless punct_valid?(punct_list)
+ raise InvalidNumberError unless tokens.last && tokens.last[:type] == :numeric
+
+ if punct_list.last && punct_list.last[:type] == :decimal
+ result = num_list[0..-2].map { |num| num[:value] }.join.to_i
+ result + num_list.last[:value].to_i / (10.0 ** num_list.last[:value].size)
+ else
+ num_list.map { |num| num[:value] }.join.to_i
+ end
+ end
+
+ def try_parse(number_text, default = nil, options = {})
+ begin
+ result = parse(number_text, options)
+ rescue InvalidNumberError
+ result = nil
+ end
+
+ if block_given?
+ yield(result)
+ else
+ result || default
+ end
+ end
+
+ def valid?(number_text, options = {})
+ parse(number_text, options)
+ true
+ rescue
+ false
+ end
+
+ def self.is_numeric?(text, separators = SEPARATOR_CHARS)
+ !!(text =~ /\A[0-9#{separators}]+\Z/)
+ end
+
+ protected
+
+ def punct_valid?(punct_list)
+ # all group, allowed one decimal at end
+ punct_list.each_with_index.all? do |punct, index|
+ punct[:type] == :group || (index == (punct_list.size - 1) && punct[:type] == :decimal)
+ end
+ end
+
+ def separators(strict = false)
+ group = strict ? group_separator : SEPARATOR_CHARS
+ decimal = strict ? decimal_separator : SEPARATOR_CHARS
+ [group, decimal]
+ end
+
+ def tokenize(number_text, group, decimal)
+ match_data = number_text.scan(/([\d]*)([#{group}]{0,1})([\d]*)([#{decimal}]{0,1})([\d]*)/)
+ (match_data.flatten || []).reject(&:empty?).map { |match| identify(match, group, decimal) }
+ end
+
+ def identify(text, group, decimal)
+ result = { :value => text }
+ result[:type] = if self.class.is_numeric?(result[:value], "")
+ :numeric
+ else
+ if result[:value] =~ /[#{group}]/
+ :group
+ elsif result[:value] =~ /[#{decimal}]/
+ :decimal
+ else
+ nil
+ end
+ end
+ result
+ end
+
+ def decimal_separator
+ @decimal_separator ||= Regexp.escape(resource[:symbols][:decimal])
+ end
+
+ def group_separator
+ @group_separator ||= Regexp.escape(resource[:symbols][:group])
+ end
+
+ def resource
+ @resource ||= TwitterCldr.get_locale_resource(@locale, "numbers")[@locale][:numbers]
+ end
+
+ end
+ end
+end
View
33 spec/localized/localized_string_spec.rb
@@ -90,6 +90,39 @@
end
end
+ describe "#to_f" do
+ it "should correctly parse a number with a thousands separator" do
+ "1,300".localize.to_f.should == 1300.0
+ "1.300".localize(:es).to_f.should == 1300.0
+ end
+
+ it "should correctly parse a number with a decimal separator" do
+ "1.300".localize.to_f.should == 1.3
+ "1,300".localize(:es).to_f.should == 1.3
+ end
+
+ it "should correctly parse a number with a thousands and a decimal separator" do
+ "1,300.05".localize.to_f.should == 1300.05
+ "1.300,05".localize(:es).to_f.should == 1300.05
+ end
+
+ it "should return zero if the string contains no numbers" do
+ "abc".localize.to_f.should == 0.0
+ end
+
+ it "should return only the numbers at the beginning of the string if the string contains any non-numeric characters" do
+ "1abc".localize.to_f.should == 1.0
+ "a1bc".localize.to_f.should == 0.0
+ end
+ end
+
+ describe "#to_i" do
+ it "should chop off the decimal" do
+ "1,300.05".localize.to_i.should == 1300
+ "1.300,05".localize(:es).to_i.should == 1300
+ end
+ end
+
describe "#normalize" do
let(:string) { 'string' }
let(:normalized_string) { 'normalized' }
View
189 spec/parsers/number_parser_spec.rb
@@ -0,0 +1,189 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+require 'spec_helper'
+require 'pry'
+require 'pry-nav'
+
+include TwitterCldr::Parsers
+
+describe TwitterCldr::Parsers::NumberParser do
+ let(:separators) { ["\\.", ","] }
+
+ before(:each) do
+ @parser = NumberParser.new(:es)
+ end
+
+ describe "#group_separator" do
+ it "returns the correct group separator" do
+ @parser.send(:group_separator).should == "\\."
+ end
+ end
+
+ describe "#decimal_separator" do
+ it "returns the correct decimal separator" do
+ @parser.send(:decimal_separator).should == ","
+ end
+ end
+
+ describe "#identify" do
+ it "properly identifies a numeric value" do
+ @parser.send(:identify, "7841", *separators).should == { :value => "7841", :type => :numeric }
+ end
+
+ it "properly identifies a decimal separator" do
+ @parser.send(:identify, ",", *separators).should == { :value => ",", :type => :decimal }
+ end
+
+ it "properly identifies a group separator" do
+ @parser.send(:identify, ".", *separators).should == { :value => ".", :type => :group }
+ end
+
+ it "returns nil if the text doesn't match a number or either separators" do
+ @parser.send(:identify, "abc", *separators).should == { :value => "abc", :type => nil }
+ end
+ end
+
+ describe "#tokenize" do
+ it "splits text by numericality and group/decimal separators" do
+ @parser.send(:tokenize, "1,33.00", *separators).should == [
+ { :value => "1", :type => :numeric },
+ { :value => ",", :type => :decimal },
+ { :value => "33", :type => :numeric },
+ { :value => ".", :type => :group },
+ { :value => "00", :type => :numeric }
+ ]
+ end
+
+ it "returns an empty array for a non-numeric string" do
+ @parser.send(:tokenize, "abc", *separators).should be_empty
+ end
+ end
+
+ describe "#separators" do
+ it "returns all separators when strict mode is off" do
+ group, decimal = @parser.send(:separators, false)
+ group.should == '\.,\s'
+ decimal.should == '\.,\s'
+ end
+
+ it "returns only locale-specific separators when strict mode is on" do
+ group, decimal = @parser.send(:separators, true)
+ group.should == '\.'
+ decimal.should == ','
+ end
+ end
+
+ describe "#punct_valid" do
+ it "correctly validates a number with no decimal" do
+ tokens = @parser.send(:tokenize, "1.337", *separators).reject { |t| t[:type] == :numeric }
+ @parser.send(:punct_valid?, tokens).should be_true
+ end
+
+ it "correctly validates a number with a decimal" do
+ tokens = @parser.send(:tokenize, "1.337,00", *separators).reject { |t| t[:type] == :numeric }
+ @parser.send(:punct_valid?, tokens).should be_true
+ end
+
+ it "reports on an invalid number when it has more than one decimal" do
+ tokens = @parser.send(:tokenize, "1,337,00", *separators).reject { |t| t[:type] == :numeric }
+ @parser.send(:punct_valid?, tokens).should be_false
+ end
+ end
+
+ describe "#is_numeric?" do
+ it "returns true if the text is numeric" do
+ NumberParser.is_numeric?("4839", "").should be_true
+ NumberParser.is_numeric?("1", "").should be_true
+ end
+
+ it "returns false if the text is not purely numeric" do
+ NumberParser.is_numeric?("abc", "").should be_false
+ NumberParser.is_numeric?("123abc", "").should be_false
+ end
+
+ it "returns false if the text is blank" do
+ NumberParser.is_numeric?("", "").should be_false
+ end
+
+ it "accepts the given characters as valid numerics" do
+ NumberParser.is_numeric?("a123a", "a").should be_true
+ NumberParser.is_numeric?("1.234,56").should be_true # default separator chars used here
+ end
+ end
+
+ describe "#valid?" do
+ it "correctly identifies a series of valid cases" do
+ ["5", "5,0", "1.337", "1.337,0", "0,05", ",5", "1.337.000,00"].each do |num|
+ @parser.valid?(num).should be_true
+ end
+ end
+
+ it "correctly identifies a series of invalid cases" do
+ ["12,0,0", "5,", "5."].each do |num|
+ @parser.valid?(num).should be_false
+ end
+ end
+ end
+
+ describe "#parse" do
+ it "correctly parses a series of valid numbers" do
+ cases = {
+ "5" => 5,
+ "5,0" => 5.0,
+ "1.337" => 1337,
+ "1.337,0" => 1337.0,
+ "0,05" => 0.05,
+ ",5" => 0.5,
+ "1.337.000,00" => 1337000.0
+ }
+
+ cases.each do |text, expected|
+ @parser.parse(text).should == expected
+ end
+ end
+
+ it "correctly raises an error when asked to parse invalid numbers" do
+ cases = ["12,0,0", "5,", "5."]
+ cases.each do |text|
+ lambda { @parser.parse(text) }.should raise_error(InvalidNumberError)
+ end
+ end
+
+ context "non-strict" do
+ it "succeeds in parsing even if inexact punctuation is used" do
+ @parser.parse("5 100", :strict => false).should == 5100
+ end
+ end
+ end
+
+ describe "#try_parse" do
+ it "parses correctly with a valid number" do
+ @parser.try_parse("1.234").should == 1234
+ end
+
+ it "parses correctly with a valid number and yields to the given block" do
+ pre_result = nil
+ @parser.try_parse("1.234") do |result|
+ pre_result = result
+ 9
+ end.should == 9
+ pre_result.should == 1234
+ end
+
+ it "falls back on the default value if the number is invalid" do
+ @parser.try_parse("5,").should be_nil
+ @parser.try_parse("5,", 0).should == 0
+ end
+
+ it "falls back on the block if the number is invalid" do
+ @parser.try_parse("5,") { |result| 9 }.should == 9
+ end
+
+ it "doesn't catch anything but an InvalidNumberError" do
+ lambda { @parser.try_parse(Object.new) }.should raise_error(NoMethodError)
+ end
+ end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.