diff --git a/.travis.yml b/.travis.yml index a8f58e07d..0b07f05eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,4 +16,6 @@ script: 'bundle exec rspec $RSPEC_OPTIONS' before_install: - gem update --system 2.1.11 - gem --version + - sudo apt-get update -qq + - sudo apt-get install -y libonig-dev before_script: 'gem install bundler' \ No newline at end of file diff --git a/Gemfile b/Gemfile index d5c342cac..cc04beb81 100644 --- a/Gemfile +++ b/Gemfile @@ -10,6 +10,10 @@ group :development, :test do if RUBY_VERSION >= "1.9" && RUBY_PLATFORM != "java" gem 'ruby-prof' end + + if RUBY_VERSION <= "1.8.7" + gem 'oniguruma' + end end group :development do diff --git a/README.md b/README.md index c79abb698..7fd2fccd4 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ TwitterCldr patches core Ruby objects like `Fixnum` and `Date` to make localizat # currencies, default USD 1337.localize(:es).to_currency.to_s # "1 337,00 $" -1337.localize(:es).to_currency.to_s(:currency => "EUR") # "1 337,00 €" +1337.localize(:es).to_currency.to_s(:currency => "EUR") # "1 337,00 \342\202\254" # percentages 1337.localize(:es).to_percent.to_s # "1 337%" @@ -71,7 +71,7 @@ If you're looking for a list of supported currencies, use the `TwitterCldr::Shar TwitterCldr::Shared::Currencies.currency_codes # ["ADP", "AED", "AFA", "AFN", ... ] # data for a specific currency code -TwitterCldr::Shared::Currencies.for_code("CAD") # {:currency=>:CAD, :name=>"Canadian dollar", :cldr_symbol=>"CA$", :symbol=>"$", :code_points=>[36]} +TwitterCldr::Shared::Currencies.for_code("CAD") # {:code_points=>[36], :symbol=>"$", :cldr_symbol=>"CA$", :name=>"Canadian dollar", :currency=>:CAD} ``` #### Short / Long Decimals @@ -130,7 +130,7 @@ DateTime.now.localize(:ja).additional_formats You can use any of the returned formats as the argument to the `to_additional_s` method: ```ruby -# "14日金曜日" +# "14\346\227\245\351\207\221\346\233\234\346\227\245" DateTime.now.localize(:ja).to_additional_s("EEEEd") ``` @@ -140,38 +140,38 @@ It's important to know that, even though any given format may not be available a | Format | Output | |:-----------|------------------------| -| EHm | Fri 12:20 | +| y | 2014 | +| M | 2 | | EHms | Fri 12:20:05 | -| Ed | 14 Fri | -| Ehm | Fri 12:20 PM | -| Ehms | Fri 12:20:05 PM | -| Gy | 2014 CE | -| GyMMM | Feb 2014 CE | +| yM | 2/2014 | +| ms | 20:05 | | GyMMMEd | Fri, Feb 14, 2014 CE | -| GyMMMd | Feb 14, 2014 CE | -| H | 12 | -| Hm | 12:20 | -| Hms | 12:20:05 | -| M | 2 | +| yMMMd | Feb 14, 2014 | | MEd | Fri, 2/14 | +| Ed | 14 Fri | +| yMEd | Fri, 2/14/2014 | +| GyMMMd | Feb 14, 2014 CE | +| yMd | 2/14/2014 | | MMM | Feb | +| Ehm | Fri 12:20 PM | +| H | 12 | +| yQQQ | Q1 2014 | | MMMEd | Fri, Feb 14 | -| MMMd | Feb 14 | +| Ehms | Fri 12:20:05 PM | +| yMMM | Feb 2014 | | Md | 2/14 | -| d | 14 | -| h | 12 PM | +| Hm | 12:20 | +| yQQQQ | 1st quarter 2014 | | hm | 12:20 PM | -| hms | 12:20:05 PM | -| ms | 20:05 | -| y | 2014 | -| yM | 2/2014 | -| yMEd | Fri, 2/14/2014 | -| yMMM | Feb 2014 | +| h | 12 PM | +| Gy | 2014 CE | | yMMMEd | Fri, Feb 14, 2014 | -| yMMMd | Feb 14, 2014 | -| yMd | 2/14/2014 | -| yQQQ | Q1 2014 | -| yQQQQ | 1st quarter 2014 | +| d | 14 | +| Hms | 12:20:05 | +| EHm | Fri 12:20 | +| hms | 12:20:05 PM | +| MMMd | Feb 14 | +| GyMMM | Feb 2014 CE | @@ -340,7 +340,7 @@ When you pass a Hash as an argument and specify placeholders with `%d`, Twi You can use the localize convenience method on language code symbols to get their equivalents in another language: ```ruby -:es.localize(:es).as_language_code # "español" +:es.localize(:es).as_language_code # "espa\303\261ol" :ru.localize(:es).as_language_code # "ruso" ``` @@ -355,10 +355,10 @@ In addition to translating language codes, TwitterCLDR provides access to the fu ```ruby # get all languages for the default locale -TwitterCldr::Shared::Languages.all # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Chinese" ... } +TwitterCldr::Shared::Languages.all # { ... :"zh-Hant" => "Traditional Chinese", :vi => "Vietnamese" ... } # get all languages for a specific locale -TwitterCldr::Shared::Languages.all_for(:es) # { ... :vi => "vietnamita", :"zh-Hant" => "chino tradicional" ... } +TwitterCldr::Shared::Languages.all_for(:es) # { ... :"zh-Hant" => "chino tradicional", :vi => "vietnamita" ... } # get a language by its code for the default locale TwitterCldr::Shared::Languages.from_code(:'zh-Hant') # "Traditional Chinese" @@ -438,14 +438,14 @@ TwitterCldr::Shared::LanguageCodes.convert(:es, :from => :bcp_47, :to => :iso_63 Use the `standards_for` method to get the standards that are available for conversion from a given code. In the example below, note that the first argument, `:es`, is the correct BCP-47 language code for Spanish, which is the second argument. The return value comprises all the available conversions: ```ruby -# [:bcp_47, :iso_639_1, :iso_639_2, :iso_639_3] +# [:iso_639_1, :iso_639_3, :bcp_47, :iso_639_2] TwitterCldr::Shared::LanguageCodes.standards_for(:es, :bcp_47) ``` Get a list of supported standards for a full English language name: ```ruby -# [:bcp_47, :iso_639_1, :iso_639_2, :iso_639_3] +# [:iso_639_1, :iso_639_3, :bcp_47, :iso_639_2] TwitterCldr::Shared::LanguageCodes.standards_for_language(:Spanish) ``` @@ -507,13 +507,13 @@ TwitterCldr::Utils::CodePoints.from_string("¿") # [191] Convert code points to characters: ```ruby -TwitterCldr::Utils::CodePoints.to_string([0xBF]) # "¿" +TwitterCldr::Utils::CodePoints.to_string([0xBF]) # "\302\277" ``` Normalize/decompose a Unicode string (NFD, NFKD, NFC, and NFKC implementations available). Note that the normalized string will almost always look the same as the original string because most character display systems automatically combine decomposed characters. ```ruby -TwitterCldr::Normalization::NFD.normalize("français") # "français" +TwitterCldr::Normalization::NFD.normalize("français") # "fran\303\247ais" ``` Normalization is easier to see in hex: @@ -550,8 +550,8 @@ Specify a specific normalization algorithm via the `:using` option. NFD, NFKD, TwitterCLDR contains an implementation of the [Unicode Collation Algorithm (UCA)](http://unicode.org/reports/tr10/) that provides language-sensitive text sorting capabilities. Conveniently, all you have to do is use the `sort` method in combination with the familiar `localize` method. Notice the difference between the default Ruby sort, which simply compares bytes, and the proper language-aware sort from TwitterCLDR in this German example: ```ruby -["Art", "Wasa", "Älg", "Ved"].sort # ["Art", "Ved", "Wasa", "Älg"] -["Art", "Wasa", "Älg", "Ved"].localize(:de).sort.to_a # ["Älg", "Art", "Ved", "Wasa"] +["Art", "Wasa", "Älg", "Ved"].sort # ["Art", "Ved", "Wasa", "\303\204lg"] +["Art", "Wasa", "Älg", "Ved"].localize(:de).sort.to_a # ["\303\204lg", "Art", "Ved", "Wasa"] ``` Behind the scenes, these convenience methods are creating instances of `LocalizedArray`, then using the `TwitterCldr::Collation::Collator` class to sort the elements: @@ -559,8 +559,8 @@ Behind the scenes, these convenience methods are creating instances of `Localize ```ruby collator = TwitterCldr::Collation::Collator.new(:de) -collator.sort(["Art", "Wasa", "Älg", "Ved"]) # ["Älg", "Art", "Ved", "Wasa"] -collator.sort!(["Art", "Wasa", "Älg", "Ved"]) # ["Älg", "Art", "Ved", "Wasa"] +collator.sort(["Art", "Wasa", "Älg", "Ved"]) # ["\303\204lg", "Art", "Ved", "Wasa"] +collator.sort!(["Art", "Wasa", "Älg", "Ved"]) # ["\303\204lg", "Art", "Ved", "Wasa"] ``` The `TwitterCldr::Collation::Collator` class also provides methods to compare two strings, get sort keys, and calculate collation elements for individual strings: diff --git a/README.md.erb b/README.md.erb index 7f05c6f98..d54da67f8 100644 --- a/README.md.erb +++ b/README.md.erb @@ -1,3 +1,5 @@ +<% $KCODE = "UTF-8" if RUBY_VERSION <= "1.8.7" %> + ## twitter-cldr-rb [![Build Status](https://secure.travis-ci.org/twitter/twitter-cldr-rb.png?branch=master)](http://travis-ci.org/twitter/twitter-cldr-rb) [![Code Climate](https://codeclimate.com/github/twitter/twitter-cldr-rb.png)](https://codeclimate.com/github/twitter/twitter-cldr-rb) TwitterCldr uses Unicode's Common Locale Data Repository (CLDR) to format certain types of text into their diff --git a/lib/twitter_cldr/formatters/numbers/abbreviated/abbreviated_number_formatter.rb b/lib/twitter_cldr/formatters/numbers/abbreviated/abbreviated_number_formatter.rb index e04e67e66..45c4d5705 100644 --- a/lib/twitter_cldr/formatters/numbers/abbreviated/abbreviated_number_formatter.rb +++ b/lib/twitter_cldr/formatters/numbers/abbreviated/abbreviated_number_formatter.rb @@ -10,7 +10,10 @@ class AbbreviatedNumberFormatter < NumberFormatter protected def transform_number(number) - if number < NumberDataReader::NUMBER_MAX && number >= NumberDataReader::NUMBER_MIN + within = number < TwitterCldr::DataReaders::NumberDataReader::NUMBER_MAX && + number >= TwitterCldr::DataReaders::NumberDataReader::NUMBER_MIN + + if within power = (((number.to_s.length - 1) / 3) * 3).floor factor = (10 ** power).to_f number / factor diff --git a/lib/twitter_cldr/parsers/unicode_regex/character_class.rb b/lib/twitter_cldr/parsers/unicode_regex/character_class.rb index 636bd6859..ff98b0d58 100644 --- a/lib/twitter_cldr/parsers/unicode_regex/character_class.rb +++ b/lib/twitter_cldr/parsers/unicode_regex/character_class.rb @@ -59,7 +59,7 @@ def evaluate(node) when UnaryOperator, BinaryOperator case node.operator when :negate - UnicodeRegex.valid_regexp_chars.subtract( + TwitterCldr::Shared::UnicodeRegex.valid_regexp_chars.subtract( evaluate(node.child) ) when :union, :pipe diff --git a/lib/twitter_cldr/shared/break_iterator.rb b/lib/twitter_cldr/shared/break_iterator.rb index 1b6625d4e..b50d09b44 100644 --- a/lib/twitter_cldr/shared/break_iterator.rb +++ b/lib/twitter_cldr/shared/break_iterator.rb @@ -120,7 +120,7 @@ def self.rule_cache end def symbol_table_for(boundary_data) - table = SymbolTable.new + table = TwitterCldr::Parsers::SymbolTable.new boundary_data[:variables].each do |variable| id = variable[:id].to_s tokens = segmentation_tokenizer.tokenize(variable[:value]) diff --git a/lib/twitter_cldr/shared/unicode_regex.rb b/lib/twitter_cldr/shared/unicode_regex.rb index a34923b13..6d400954c 100644 --- a/lib/twitter_cldr/shared/unicode_regex.rb +++ b/lib/twitter_cldr/shared/unicode_regex.rb @@ -61,7 +61,15 @@ def initialize(elements, modifiers = nil) end def to_regexp - @regexp ||= Regexp.new(to_regexp_str, modifiers) + if RUBY_VERSION <= "1.8.7" + begin + Oniguruma::ORegexp.new(to_regexp_str, modifiers) + rescue NameError + raise "Unicode regular expressions require the Oniguruma gem when using Ruby 1.8. Please install, require, and retry." + end + else + @regexp ||= Regexp.new(to_regexp_str, modifiers) + end end def to_regexp_str diff --git a/lib/twitter_cldr/utils/code_points.rb b/lib/twitter_cldr/utils/code_points.rb index c6a0d52a0..2f2e9ae4a 100644 --- a/lib/twitter_cldr/utils/code_points.rb +++ b/lib/twitter_cldr/utils/code_points.rb @@ -26,7 +26,7 @@ def to_chars(code_points) end def from_string(str) - from_chars(str.chars.to_a) + str.unpack("U*") end def to_string(code_points) diff --git a/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb b/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb index 6e94df116..f057f6dd1 100644 --- a/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb +++ b/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb @@ -8,7 +8,10 @@ include TwitterCldr::Formatters describe LongDecimalFormatter do - let(:data_reader) { NumberDataReader.new(:en, :type => :long_decimal) } + let(:data_reader) do + TwitterCldr::DataReaders::NumberDataReader.new(:en, :type => :long_decimal) + end + let(:formatter) { data_reader.formatter } let(:tokenizer) { data_reader.tokenizer } diff --git a/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb b/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb index 9e6632ea7..1d18a7287 100644 --- a/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +++ b/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb @@ -8,7 +8,10 @@ include TwitterCldr::Formatters describe ShortDecimalFormatter do - let(:data_reader) { NumberDataReader.new(:en, :type => :short_decimal) } + let(:data_reader) do + TwitterCldr::DataReaders::NumberDataReader.new(:en, :type => :short_decimal) + end + let(:formatter) { data_reader.formatter } let(:tokenizer) { data_reader.tokenizer } diff --git a/spec/parsers/symbol_table_spec.rb b/spec/parsers/symbol_table_spec.rb index cfd1f5087..27f930523 100644 --- a/spec/parsers/symbol_table_spec.rb +++ b/spec/parsers/symbol_table_spec.rb @@ -13,7 +13,13 @@ describe "#fetch" do it "should be able to retrieve values for symbols" do table.fetch(:a).should == "b" - lambda { table.fetch(:z) }.should raise_error(KeyError) + fetch = lambda { table.fetch(:z) } + + if RUBY_VERSION > "1.8.7" + fetch.should raise_error(KeyError) + else + fetch.should raise_error(IndexError) + end end end diff --git a/spec/shared/unicode_regex_spec.rb b/spec/shared/unicode_regex_spec.rb index d4228aae9..5f6e417c9 100644 --- a/spec/shared/unicode_regex_spec.rb +++ b/spec/shared/unicode_regex_spec.rb @@ -37,7 +37,11 @@ def compile(str, symbol_table = nil) describe "#to_regexp" do it "should return a ruby Regexp" do - regex.to_regexp.should be_a(Regexp) + if RUBY_VERSION <= "1.8.7" + regex.to_regexp.should be_a(Oniguruma::ORegexp) + else + regex.to_regexp.should be_a(Regexp) + end end it "should properly turn various basic regexes into strings" do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 04d0b0bef..08184d9c4 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -8,6 +8,10 @@ require 'twitter_cldr' require 'pry-nav' +if RUBY_VERSION <= "1.8.7" + require 'oniguruma' +end + if ENV['SCOV'] require 'simplecov' SimpleCov.start