Skip to content

Commit

Permalink
Merge branch 'master' of github.com:twitter/twitter-cldr-rb
Browse files Browse the repository at this point in the history
  • Loading branch information
Cameron Dutro committed Feb 4, 2013
2 parents 62c122a + dfee953 commit f0f8550
Show file tree
Hide file tree
Showing 6 changed files with 360 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/twitter_cldr.rb
Expand Up @@ -23,6 +23,7 @@ module TwitterCldr
autoload :Collation, 'twitter_cldr/collation'
autoload :Localized, 'twitter_cldr/localized'
autoload :Normalization, 'twitter_cldr/normalization'
autoload :Parsers, 'twitter_cldr/parsers'
autoload :Resources, 'twitter_cldr/resources'
autoload :Shared, 'twitter_cldr/shared'
autoload :Tokenizers, 'twitter_cldr/tokenizers'
Expand Down
14 changes: 14 additions & 0 deletions lib/twitter_cldr/localized/localized_string.rb
Expand Up @@ -36,6 +36,20 @@ def to_s
@base_obj.dup
end

def to_i(options = {})
to_f(options).to_i
end

def to_f(options = {})
if TwitterCldr::Parsers::NumberParser.is_numeric?(@base_obj)
TwitterCldr::Parsers::NumberParser.new(@locale).try_parse(@base_obj, options) do |result|
result || @base_obj.to_f
end
else
@base_obj.to_f
end
end

def size
code_points.size
end
Expand Down
10 changes: 10 additions & 0 deletions lib/twitter_cldr/parsers.rb
@@ -0,0 +1,10 @@
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
module Parsers
autoload :NumberParser, 'twitter_cldr/parsers/number_parser'
end
end
113 changes: 113 additions & 0 deletions lib/twitter_cldr/parsers/number_parser.rb
@@ -0,0 +1,113 @@
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

module TwitterCldr
module Parsers

class InvalidNumberError < StandardError; end

class NumberParser

SEPARATOR_CHARS = ['.', ',', ' '].map do |char|
char == ' ' ? '\s' : Regexp.escape(char)
end.join

def initialize(locale = TwitterCldr.locale)
@locale = locale
end

def parse(number_text, options = {})
options[:strict] = true unless options.include?(:strict)
group, decimal = separators(options[:strict])
tokens = tokenize(number_text, group, decimal)

num_list, punct_list = tokens.partition { |t| t[:type] == :numeric }
raise InvalidNumberError unless punct_valid?(punct_list)
raise InvalidNumberError unless tokens.last && tokens.last[:type] == :numeric

if punct_list.last && punct_list.last[:type] == :decimal
result = num_list[0..-2].map { |num| num[:value] }.join.to_i
result + num_list.last[:value].to_i / (10.0 ** num_list.last[:value].size)
else
num_list.map { |num| num[:value] }.join.to_i
end
end

def try_parse(number_text, default = nil, options = {})
begin
result = parse(number_text, options)
rescue InvalidNumberError
result = nil
end

if block_given?
yield(result)
else
result || default
end
end

def valid?(number_text, options = {})
parse(number_text, options)
true
rescue
false
end

def self.is_numeric?(text, separators = SEPARATOR_CHARS)
!!(text =~ /\A[0-9#{separators}]+\Z/)
end

protected

def punct_valid?(punct_list)
# all group, allowed one decimal at end
punct_list.each_with_index.all? do |punct, index|
punct[:type] == :group || (index == (punct_list.size - 1) && punct[:type] == :decimal)
end
end

def separators(strict = false)
group = strict ? group_separator : SEPARATOR_CHARS
decimal = strict ? decimal_separator : SEPARATOR_CHARS
[group, decimal]
end

def tokenize(number_text, group, decimal)
match_data = number_text.scan(/([\d]*)([#{group}]{0,1})([\d]*)([#{decimal}]{0,1})([\d]*)/)
(match_data.flatten || []).reject(&:empty?).map { |match| identify(match, group, decimal) }
end

def identify(text, group, decimal)
result = { :value => text }
result[:type] = if self.class.is_numeric?(result[:value], "")
:numeric
else
if result[:value] =~ /[#{group}]/
:group
elsif result[:value] =~ /[#{decimal}]/
:decimal
else
nil
end
end
result
end

def decimal_separator
@decimal_separator ||= Regexp.escape(resource[:symbols][:decimal])
end

def group_separator
@group_separator ||= Regexp.escape(resource[:symbols][:group])
end

def resource
@resource ||= TwitterCldr.get_locale_resource(@locale, "numbers")[@locale][:numbers]
end

end
end
end
33 changes: 33 additions & 0 deletions spec/localized/localized_string_spec.rb
Expand Up @@ -90,6 +90,39 @@
end
end

describe "#to_f" do
it "should correctly parse a number with a thousands separator" do
"1,300".localize.to_f.should == 1300.0
"1.300".localize(:es).to_f.should == 1300.0
end

it "should correctly parse a number with a decimal separator" do
"1.300".localize.to_f.should == 1.3
"1,300".localize(:es).to_f.should == 1.3
end

it "should correctly parse a number with a thousands and a decimal separator" do
"1,300.05".localize.to_f.should == 1300.05
"1.300,05".localize(:es).to_f.should == 1300.05
end

it "should return zero if the string contains no numbers" do
"abc".localize.to_f.should == 0.0
end

it "should return only the numbers at the beginning of the string if the string contains any non-numeric characters" do
"1abc".localize.to_f.should == 1.0
"a1bc".localize.to_f.should == 0.0
end
end

describe "#to_i" do
it "should chop off the decimal" do
"1,300.05".localize.to_i.should == 1300
"1.300,05".localize(:es).to_i.should == 1300
end
end

describe "#normalize" do
let(:string) { 'string' }
let(:normalized_string) { 'normalized' }
Expand Down
189 changes: 189 additions & 0 deletions spec/parsers/number_parser_spec.rb
@@ -0,0 +1,189 @@
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'spec_helper'
require 'pry'
require 'pry-nav'

include TwitterCldr::Parsers

describe TwitterCldr::Parsers::NumberParser do
let(:separators) { ["\\.", ","] }

before(:each) do
@parser = NumberParser.new(:es)
end

describe "#group_separator" do
it "returns the correct group separator" do
@parser.send(:group_separator).should == "\\."
end
end

describe "#decimal_separator" do
it "returns the correct decimal separator" do
@parser.send(:decimal_separator).should == ","
end
end

describe "#identify" do
it "properly identifies a numeric value" do
@parser.send(:identify, "7841", *separators).should == { :value => "7841", :type => :numeric }
end

it "properly identifies a decimal separator" do
@parser.send(:identify, ",", *separators).should == { :value => ",", :type => :decimal }
end

it "properly identifies a group separator" do
@parser.send(:identify, ".", *separators).should == { :value => ".", :type => :group }
end

it "returns nil if the text doesn't match a number or either separators" do
@parser.send(:identify, "abc", *separators).should == { :value => "abc", :type => nil }
end
end

describe "#tokenize" do
it "splits text by numericality and group/decimal separators" do
@parser.send(:tokenize, "1,33.00", *separators).should == [
{ :value => "1", :type => :numeric },
{ :value => ",", :type => :decimal },
{ :value => "33", :type => :numeric },
{ :value => ".", :type => :group },
{ :value => "00", :type => :numeric }
]
end

it "returns an empty array for a non-numeric string" do
@parser.send(:tokenize, "abc", *separators).should be_empty
end
end

describe "#separators" do
it "returns all separators when strict mode is off" do
group, decimal = @parser.send(:separators, false)
group.should == '\.,\s'
decimal.should == '\.,\s'
end

it "returns only locale-specific separators when strict mode is on" do
group, decimal = @parser.send(:separators, true)
group.should == '\.'
decimal.should == ','
end
end

describe "#punct_valid" do
it "correctly validates a number with no decimal" do
tokens = @parser.send(:tokenize, "1.337", *separators).reject { |t| t[:type] == :numeric }
@parser.send(:punct_valid?, tokens).should be_true
end

it "correctly validates a number with a decimal" do
tokens = @parser.send(:tokenize, "1.337,00", *separators).reject { |t| t[:type] == :numeric }
@parser.send(:punct_valid?, tokens).should be_true
end

it "reports on an invalid number when it has more than one decimal" do
tokens = @parser.send(:tokenize, "1,337,00", *separators).reject { |t| t[:type] == :numeric }
@parser.send(:punct_valid?, tokens).should be_false
end
end

describe "#is_numeric?" do
it "returns true if the text is numeric" do
NumberParser.is_numeric?("4839", "").should be_true
NumberParser.is_numeric?("1", "").should be_true
end

it "returns false if the text is not purely numeric" do
NumberParser.is_numeric?("abc", "").should be_false
NumberParser.is_numeric?("123abc", "").should be_false
end

it "returns false if the text is blank" do
NumberParser.is_numeric?("", "").should be_false
end

it "accepts the given characters as valid numerics" do
NumberParser.is_numeric?("a123a", "a").should be_true
NumberParser.is_numeric?("1.234,56").should be_true # default separator chars used here
end
end

describe "#valid?" do
it "correctly identifies a series of valid cases" do
["5", "5,0", "1.337", "1.337,0", "0,05", ",5", "1.337.000,00"].each do |num|
@parser.valid?(num).should be_true
end
end

it "correctly identifies a series of invalid cases" do
["12,0,0", "5,", "5."].each do |num|
@parser.valid?(num).should be_false
end
end
end

describe "#parse" do
it "correctly parses a series of valid numbers" do
cases = {
"5" => 5,
"5,0" => 5.0,
"1.337" => 1337,
"1.337,0" => 1337.0,
"0,05" => 0.05,
",5" => 0.5,
"1.337.000,00" => 1337000.0
}

cases.each do |text, expected|
@parser.parse(text).should == expected
end
end

it "correctly raises an error when asked to parse invalid numbers" do
cases = ["12,0,0", "5,", "5."]
cases.each do |text|
lambda { @parser.parse(text) }.should raise_error(InvalidNumberError)
end
end

context "non-strict" do
it "succeeds in parsing even if inexact punctuation is used" do
@parser.parse("5 100", :strict => false).should == 5100
end
end
end

describe "#try_parse" do
it "parses correctly with a valid number" do
@parser.try_parse("1.234").should == 1234
end

it "parses correctly with a valid number and yields to the given block" do
pre_result = nil
@parser.try_parse("1.234") do |result|
pre_result = result
9
end.should == 9
pre_result.should == 1234
end

it "falls back on the default value if the number is invalid" do
@parser.try_parse("5,").should be_nil
@parser.try_parse("5,", 0).should == 0
end

it "falls back on the block if the number is invalid" do
@parser.try_parse("5,") { |result| 9 }.should == 9
end

it "doesn't catch anything but an InvalidNumberError" do
lambda { @parser.try_parse(Object.new) }.should raise_error(NoMethodError)
end
end
end

0 comments on commit f0f8550

Please sign in to comment.