Merge branch 'master' of github.com:twitter/twitter-cldr-rb

twitter · Feb 4, 2013 · f0f8550 · f0f8550
2 parents 62c122a + dfee953
commit f0f8550
Show file tree

Hide file tree

Showing 6 changed files with 360 additions and 0 deletions.
diff --git a/lib/twitter_cldr.rb b/lib/twitter_cldr.rb
@@ -23,6 +23,7 @@ module TwitterCldr
   autoload :Collation,     'twitter_cldr/collation'
   autoload :Localized,     'twitter_cldr/localized'
   autoload :Normalization, 'twitter_cldr/normalization'
+  autoload :Parsers,       'twitter_cldr/parsers'
   autoload :Resources,     'twitter_cldr/resources'
   autoload :Shared,        'twitter_cldr/shared'
   autoload :Tokenizers,    'twitter_cldr/tokenizers'

diff --git a/lib/twitter_cldr/localized/localized_string.rb b/lib/twitter_cldr/localized/localized_string.rb
@@ -36,6 +36,20 @@ def to_s
         @base_obj.dup
       end
 
+      def to_i(options = {})
+        to_f(options).to_i
+      end
+
+      def to_f(options = {})
+        if TwitterCldr::Parsers::NumberParser.is_numeric?(@base_obj)
+          TwitterCldr::Parsers::NumberParser.new(@locale).try_parse(@base_obj, options) do |result|
+            result || @base_obj.to_f
+          end
+        else
+          @base_obj.to_f
+        end
+      end
+
       def size
         code_points.size
       end

diff --git a/lib/twitter_cldr/parsers.rb b/lib/twitter_cldr/parsers.rb
@@ -0,0 +1,10 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+module TwitterCldr
+  module Parsers
+    autoload :NumberParser, 'twitter_cldr/parsers/number_parser'
+  end
+end
diff --git a/lib/twitter_cldr/parsers/number_parser.rb b/lib/twitter_cldr/parsers/number_parser.rb
@@ -0,0 +1,113 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+module TwitterCldr
+  module Parsers
+
+    class InvalidNumberError < StandardError; end
+
+    class NumberParser
+
+      SEPARATOR_CHARS = ['.', ',', ' '].map do |char|
+        char == ' ' ? '\s' : Regexp.escape(char)
+      end.join
+
+      def initialize(locale = TwitterCldr.locale)
+        @locale = locale
+      end
+
+      def parse(number_text, options = {})
+        options[:strict] = true unless options.include?(:strict)
+        group, decimal = separators(options[:strict])
+        tokens = tokenize(number_text, group, decimal)
+
+        num_list, punct_list = tokens.partition { |t| t[:type] == :numeric }
+        raise InvalidNumberError unless punct_valid?(punct_list)
+        raise InvalidNumberError unless tokens.last && tokens.last[:type] == :numeric
+
+        if punct_list.last && punct_list.last[:type] == :decimal
+          result = num_list[0..-2].map { |num| num[:value] }.join.to_i
+          result + num_list.last[:value].to_i / (10.0 ** num_list.last[:value].size)
+        else
+          num_list.map { |num| num[:value] }.join.to_i
+        end
+      end
+
+      def try_parse(number_text, default = nil, options = {})
+        begin
+          result = parse(number_text, options)
+        rescue InvalidNumberError
+          result = nil
+        end
+
+        if block_given?
+          yield(result)
+        else
+          result || default
+        end
+      end
+
+      def valid?(number_text, options = {})
+        parse(number_text, options)
+        true
+      rescue
+        false
+      end
+
+      def self.is_numeric?(text, separators = SEPARATOR_CHARS)
+        !!(text =~ /\A[0-9#{separators}]+\Z/)
+      end
+
+      protected
+
+      def punct_valid?(punct_list)
+        # all group, allowed one decimal at end
+        punct_list.each_with_index.all? do |punct, index|
+          punct[:type] == :group || (index == (punct_list.size - 1) && punct[:type] == :decimal)
+        end
+      end
+
+      def separators(strict = false)
+        group = strict ? group_separator : SEPARATOR_CHARS
+        decimal = strict ? decimal_separator : SEPARATOR_CHARS
+        [group, decimal]
+      end
+
+      def tokenize(number_text, group, decimal)
+        match_data = number_text.scan(/([\d]*)([#{group}]{0,1})([\d]*)([#{decimal}]{0,1})([\d]*)/)
+        (match_data.flatten || []).reject(&:empty?).map { |match| identify(match, group, decimal) }
+      end
+
+      def identify(text, group, decimal)
+        result = { :value => text }
+        result[:type] = if self.class.is_numeric?(result[:value], "")
+          :numeric
+        else
+          if result[:value] =~ /[#{group}]/
+            :group
+          elsif result[:value] =~ /[#{decimal}]/
+            :decimal
+          else
+            nil
+          end
+        end
+        result
+      end
+
+      def decimal_separator
+        @decimal_separator ||= Regexp.escape(resource[:symbols][:decimal])
+      end
+
+      def group_separator
+        @group_separator ||= Regexp.escape(resource[:symbols][:group])
+      end
+
+      def resource
+        @resource ||= TwitterCldr.get_locale_resource(@locale, "numbers")[@locale][:numbers]
+      end
+
+    end
+  end
+end
diff --git a/spec/localized/localized_string_spec.rb b/spec/localized/localized_string_spec.rb
@@ -90,6 +90,39 @@
     end
   end
 
+  describe "#to_f" do
+    it "should correctly parse a number with a thousands separator" do
+      "1,300".localize.to_f.should == 1300.0
+      "1.300".localize(:es).to_f.should == 1300.0
+    end
+
+    it "should correctly parse a number with a decimal separator" do
+      "1.300".localize.to_f.should == 1.3
+      "1,300".localize(:es).to_f.should == 1.3
+    end
+
+    it "should correctly parse a number with a thousands and a decimal separator" do
+      "1,300.05".localize.to_f.should == 1300.05
+      "1.300,05".localize(:es).to_f.should == 1300.05
+    end
+
+    it "should return zero if the string contains no numbers" do
+      "abc".localize.to_f.should == 0.0
+    end
+
+    it "should return only the numbers at the beginning of the string if the string contains any non-numeric characters" do
+      "1abc".localize.to_f.should == 1.0
+      "a1bc".localize.to_f.should == 0.0
+    end
+  end
+
+  describe "#to_i" do
+    it "should chop off the decimal" do
+      "1,300.05".localize.to_i.should == 1300
+      "1.300,05".localize(:es).to_i.should == 1300
+    end
+  end
+
   describe "#normalize" do
     let(:string) { 'string' }
     let(:normalized_string) { 'normalized' }

diff --git a/spec/parsers/number_parser_spec.rb b/spec/parsers/number_parser_spec.rb
@@ -0,0 +1,189 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+require 'spec_helper'
+require 'pry'
+require 'pry-nav'
+
+include TwitterCldr::Parsers
+
+describe TwitterCldr::Parsers::NumberParser do
+  let(:separators) { ["\\.", ","] }
+
+  before(:each) do
+    @parser = NumberParser.new(:es)
+  end
+
+  describe "#group_separator" do
+    it "returns the correct group separator" do
+      @parser.send(:group_separator).should == "\\."
+    end
+  end
+
+  describe "#decimal_separator" do
+    it "returns the correct decimal separator" do
+      @parser.send(:decimal_separator).should == ","
+    end
+  end
+
+  describe "#identify" do
+    it "properly identifies a numeric value" do
+      @parser.send(:identify, "7841", *separators).should == { :value => "7841", :type => :numeric }
+    end
+
+    it "properly identifies a decimal separator" do
+      @parser.send(:identify, ",", *separators).should == { :value => ",", :type => :decimal }
+    end
+
+    it "properly identifies a group separator" do
+      @parser.send(:identify, ".", *separators).should == { :value => ".", :type => :group }
+    end
+
+    it "returns nil if the text doesn't match a number or either separators" do
+      @parser.send(:identify, "abc", *separators).should == { :value => "abc", :type => nil }
+    end
+  end
+
+  describe "#tokenize" do
+    it "splits text by numericality and group/decimal separators" do
+      @parser.send(:tokenize, "1,33.00", *separators).should == [
+        { :value => "1",  :type => :numeric },
+        { :value => ",",  :type => :decimal },
+        { :value => "33", :type => :numeric },
+        { :value => ".",  :type => :group },
+        { :value => "00", :type => :numeric }
+      ]
+    end
+
+    it "returns an empty array for a non-numeric string" do
+      @parser.send(:tokenize, "abc", *separators).should be_empty
+    end
+  end
+
+  describe "#separators" do
+    it "returns all separators when strict mode is off" do
+      group, decimal = @parser.send(:separators, false)
+      group.should == '\.,\s'
+      decimal.should == '\.,\s'
+    end
+
+    it "returns only locale-specific separators when strict mode is on" do
+      group, decimal = @parser.send(:separators, true)
+      group.should == '\.'
+      decimal.should == ','
+    end
+  end
+
+  describe "#punct_valid" do
+    it "correctly validates a number with no decimal" do
+      tokens = @parser.send(:tokenize, "1.337", *separators).reject { |t| t[:type] == :numeric }
+      @parser.send(:punct_valid?, tokens).should be_true
+    end
+
+    it "correctly validates a number with a decimal" do
+      tokens = @parser.send(:tokenize, "1.337,00", *separators).reject { |t| t[:type] == :numeric }
+      @parser.send(:punct_valid?, tokens).should be_true
+    end
+
+    it "reports on an invalid number when it has more than one decimal" do
+      tokens = @parser.send(:tokenize, "1,337,00", *separators).reject { |t| t[:type] == :numeric }
+      @parser.send(:punct_valid?, tokens).should be_false
+    end
+  end
+
+  describe "#is_numeric?" do
+    it "returns true if the text is numeric" do
+      NumberParser.is_numeric?("4839", "").should be_true
+      NumberParser.is_numeric?("1", "").should be_true
+    end
+
+    it "returns false if the text is not purely numeric" do
+      NumberParser.is_numeric?("abc", "").should be_false
+      NumberParser.is_numeric?("123abc", "").should be_false
+    end
+
+    it "returns false if the text is blank" do
+      NumberParser.is_numeric?("", "").should be_false
+    end
+
+    it "accepts the given characters as valid numerics" do
+      NumberParser.is_numeric?("a123a", "a").should be_true
+      NumberParser.is_numeric?("1.234,56").should be_true  # default separator chars used here
+    end
+  end
+
+  describe "#valid?" do
+    it "correctly identifies a series of valid cases" do
+      ["5", "5,0", "1.337", "1.337,0", "0,05", ",5", "1.337.000,00"].each do |num|
+        @parser.valid?(num).should be_true
+      end
+    end
+
+    it "correctly identifies a series of invalid cases" do
+      ["12,0,0", "5,", "5."].each do |num|
+        @parser.valid?(num).should be_false
+      end
+    end
+  end
+
+  describe "#parse" do
+    it "correctly parses a series of valid numbers" do
+      cases = {
+        "5" => 5,
+        "5,0" => 5.0,
+        "1.337" => 1337,
+        "1.337,0" => 1337.0,
+        "0,05" => 0.05,
+        ",5" => 0.5,
+        "1.337.000,00" => 1337000.0
+      }
+
+      cases.each do |text, expected|
+        @parser.parse(text).should == expected
+      end
+    end
+
+    it "correctly raises an error when asked to parse invalid numbers" do
+      cases = ["12,0,0", "5,", "5."]
+      cases.each do |text|
+        lambda { @parser.parse(text) }.should raise_error(InvalidNumberError)
+      end
+    end
+
+    context "non-strict" do
+      it "succeeds in parsing even if inexact punctuation is used" do
+        @parser.parse("5 100", :strict => false).should == 5100
+      end
+    end
+  end
+
+  describe "#try_parse" do
+    it "parses correctly with a valid number" do
+      @parser.try_parse("1.234").should == 1234
+    end
+
+    it "parses correctly with a valid number and yields to the given block" do
+      pre_result = nil
+      @parser.try_parse("1.234") do |result|
+        pre_result = result
+        9
+      end.should == 9
+      pre_result.should == 1234
+    end
+
+    it "falls back on the default value if the number is invalid" do
+      @parser.try_parse("5,").should be_nil
+      @parser.try_parse("5,", 0).should == 0
+    end
+
+    it "falls back on the block if the number is invalid" do
+      @parser.try_parse("5,") { |result| 9 }.should == 9
+    end
+
+    it "doesn't catch anything but an InvalidNumberError" do
+      lambda { @parser.try_parse(Object.new) }.should raise_error(NoMethodError)
+    end
+  end
+end