From eeff3723ba428b5e9f74e4adf46619480635da0f Mon Sep 17 00:00:00 2001 From: Cameron Dutro Date: Wed, 29 Jul 2020 10:57:25 -0700 Subject: [PATCH 1/2] Choose a supported locale based on ancestry --- Rakefile | 5 + lib/twitter_cldr/resources.rb | 2 + .../resources/parent_locales_importer.rb | 54 ++++++ lib/twitter_cldr/shared/locale.rb | 58 +++++- resources/shared/parent_locales.yml | 172 ++++++++++++++++++ spec/shared/locale_spec.rb | 34 +++- 6 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 lib/twitter_cldr/resources/parent_locales_importer.rb create mode 100644 resources/shared/parent_locales.yml diff --git a/Rakefile b/Rakefile index d40bc3088..8f878ae63 100644 --- a/Rakefile +++ b/Rakefile @@ -141,6 +141,11 @@ namespace :update do TwitterCldr::Resources::CasefolderClassGenerator.new.import end + desc 'Import parent locale data' + task :parent_locales do + TwitterCldr::Resources::ParentLocalesImporter.new.import + end + desc 'Import postal codes resource' task :postal_codes do TwitterCldr::Resources::PostalCodesImporter.new.import diff --git a/lib/twitter_cldr/resources.rb b/lib/twitter_cldr/resources.rb index 1d6081e7f..0509b71fd 100644 --- a/lib/twitter_cldr/resources.rb +++ b/lib/twitter_cldr/resources.rb @@ -20,6 +20,7 @@ module Resources autoload :Loader, 'twitter_cldr/resources/loader' autoload :LocalesResourcesImporter, 'twitter_cldr/resources/locales_resources_importer' autoload :NumberFormatsImporter, 'twitter_cldr/resources/number_formats_importer' + autoload :ParentLocalesImporter, 'twitter_cldr/resources/parent_locales_importer' autoload :PostalCodesImporter, 'twitter_cldr/resources/postal_codes_importer' autoload :Properties, 'twitter_cldr/resources/properties' autoload :SegmentDictionariesImporter, 'twitter_cldr/resources/segment_dictionaries_importer' @@ -59,6 +60,7 @@ def standard_importer_classes LanguageCodesImporter, LocalesResourcesImporter, NumberFormatsImporter, + ParentLocalesImporter, PostalCodesImporter, RbnfTestImporter, SegmentDictionariesImporter, diff --git a/lib/twitter_cldr/resources/parent_locales_importer.rb b/lib/twitter_cldr/resources/parent_locales_importer.rb new file mode 100644 index 000000000..8e09b049d --- /dev/null +++ b/lib/twitter_cldr/resources/parent_locales_importer.rb @@ -0,0 +1,54 @@ +# encoding: UTF-8 + +# Copyright 2012 Twitter, Inc +# http://www.apache.org/licenses/LICENSE-2.0 + +require 'nokogiri' + +module TwitterCldr + module Resources + + class ParentLocalesImporter < Importer + + requirement :cldr, Versions.cldr_version + output_path 'shared' + ruby_engine :mri + + private + + def execute + output_file = File.join(output_path, 'parent_locales.yml') + File.write(output_file, YAML.dump(parent_locales)) + end + + def parent_locales + @parent_locales ||= supplemental_data.xpath('//parentLocales/parentLocale').each_with_object({}) do |node, ret| + parent = node.attr('parent') + locales = node.attr('locales').split(' ') + + locales.each do |locale| + ret[locale] = parent + end + end + end + + def output_path + params.fetch(:output_path) + end + + def supplemental_data + @supplemental_data ||= Nokogiri.XML( + File.read( + File.join( + requirements[:cldr].common_path, + 'supplemental', + 'supplementalData.xml' + ) + ) + ) + end + + end + + end +end diff --git a/lib/twitter_cldr/shared/locale.rb b/lib/twitter_cldr/shared/locale.rb index fe6948436..f98c18840 100644 --- a/lib/twitter_cldr/shared/locale.rb +++ b/lib/twitter_cldr/shared/locale.rb @@ -3,6 +3,8 @@ # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 +require 'set' + module TwitterCldr module Shared class Locale @@ -209,6 +211,10 @@ def validity_resource @validity_resource ||= TwitterCldr.get_resource('shared', 'validity_data')[:validity_data] end + + def parent_locales + @parent_locales ||= TwitterCldr.get_resource('shared', 'parent_locales') + end end attr_accessor :language, :script, :region, :variants @@ -239,11 +245,9 @@ def max_supported def supported @supported ||= begin - found = permutations('-').find do |perm| - TwitterCldr.supported_locale?(perm) + ancestor_chain.sort.find do |loc| + TwitterCldr.supported_locale?(loc.dasherized) end - - self.class.new(found) if found end end @@ -273,6 +277,52 @@ def permutations(delimiter = '_') perms.uniq end + def ==(other) + language == other.language && + script == other.script && + region == other.region && + variants == other.variants + end + + alias eql? == + + def hash + to_a.hash + end + + def sort_key + k = 0 + k += 3 if language + k += 2 if script + k += 1 if region + k + end + + def <=>(other) + other.sort_key <=> sort_key + end + + def ancestor_chain + ancestry = [self] + remaining = [self] + + until remaining.empty? + locale = remaining.pop + + if parent = self.class.send(:parent_locales)[locale.to_s] + parent = self.class.parse(parent) + ancestry << parent + remaining << parent + else + parents = locale.permutations.map { |p| self.class.parse(p) } + remaining += parents - ancestry + ancestry += parents - ancestry + end + end + + ancestry + end + end end end diff --git a/resources/shared/parent_locales.yml b/resources/shared/parent_locales.yml new file mode 100644 index 000000000..6923ee138 --- /dev/null +++ b/resources/shared/parent_locales.yml @@ -0,0 +1,172 @@ +--- +az_Arab: root +az_Cyrl: root +blt_Latn: root +bm_Nkoo: root +bs_Cyrl: root +byn_Latn: root +cu_Glag: root +dje_Arab: root +dyo_Arab: root +en_Dsrt: root +en_Shaw: root +ff_Adlm: root +ff_Arab: root +ha_Arab: root +hi_Latn: root +iu_Latn: root +kk_Arab: root +ks_Deva: root +ku_Arab: root +ky_Arab: root +ky_Latn: root +ml_Arab: root +mn_Mong: root +mni_Mtei: root +ms_Arab: root +pa_Arab: root +sat_Deva: root +sd_Deva: root +sd_Khoj: root +sd_Sind: root +shi_Latn: root +so_Arab: root +sr_Latn: root +sw_Arab: root +tg_Arab: root +ug_Cyrl: root +uz_Arab: root +uz_Cyrl: root +vai_Latn: root +wo_Arab: root +yo_Arab: root +yue_Hans: root +zh_Hant: root +en_150: en_001 +en_AG: en_001 +en_AI: en_001 +en_AU: en_001 +en_BB: en_001 +en_BM: en_001 +en_BS: en_001 +en_BW: en_001 +en_BZ: en_001 +en_CA: en_001 +en_CC: en_001 +en_CK: en_001 +en_CM: en_001 +en_CX: en_001 +en_CY: en_001 +en_DG: en_001 +en_DM: en_001 +en_ER: en_001 +en_FJ: en_001 +en_FK: en_001 +en_FM: en_001 +en_GB: en_001 +en_GD: en_001 +en_GG: en_001 +en_GH: en_001 +en_GI: en_001 +en_GM: en_001 +en_GY: en_001 +en_HK: en_001 +en_IE: en_001 +en_IL: en_001 +en_IM: en_001 +en_IN: en_001 +en_IO: en_001 +en_JE: en_001 +en_JM: en_001 +en_KE: en_001 +en_KI: en_001 +en_KN: en_001 +en_KY: en_001 +en_LC: en_001 +en_LR: en_001 +en_LS: en_001 +en_MG: en_001 +en_MO: en_001 +en_MS: en_001 +en_MT: en_001 +en_MU: en_001 +en_MW: en_001 +en_MY: en_001 +en_NA: en_001 +en_NF: en_001 +en_NG: en_001 +en_NR: en_001 +en_NU: en_001 +en_NZ: en_001 +en_PG: en_001 +en_PH: en_001 +en_PK: en_001 +en_PN: en_001 +en_PW: en_001 +en_RW: en_001 +en_SB: en_001 +en_SC: en_001 +en_SD: en_001 +en_SG: en_001 +en_SH: en_001 +en_SL: en_001 +en_SS: en_001 +en_SX: en_001 +en_SZ: en_001 +en_TC: en_001 +en_TK: en_001 +en_TO: en_001 +en_TT: en_001 +en_TV: en_001 +en_TZ: en_001 +en_UG: en_001 +en_VC: en_001 +en_VG: en_001 +en_VU: en_001 +en_WS: en_001 +en_ZA: en_001 +en_ZM: en_001 +en_ZW: en_001 +en_AT: en_150 +en_BE: en_150 +en_CH: en_150 +en_DE: en_150 +en_DK: en_150 +en_FI: en_150 +en_NL: en_150 +en_SE: en_150 +en_SI: en_150 +es_AR: es_419 +es_BO: es_419 +es_BR: es_419 +es_BZ: es_419 +es_CL: es_419 +es_CO: es_419 +es_CR: es_419 +es_CU: es_419 +es_DO: es_419 +es_EC: es_419 +es_GT: es_419 +es_HN: es_419 +es_MX: es_419 +es_NI: es_419 +es_PA: es_419 +es_PE: es_419 +es_PR: es_419 +es_PY: es_419 +es_SV: es_419 +es_US: es_419 +es_UY: es_419 +es_VE: es_419 +pt_AO: pt_PT +pt_CH: pt_PT +pt_CV: pt_PT +pt_FR: pt_PT +pt_GQ: pt_PT +pt_GW: pt_PT +pt_LU: pt_PT +pt_MO: pt_PT +pt_MZ: pt_PT +pt_ST: pt_PT +pt_TL: pt_PT +zh_Hant_MO: zh_Hant_HK diff --git a/spec/shared/locale_spec.rb b/spec/shared/locale_spec.rb index 1dd08edb5..199a28239 100644 --- a/spec/shared/locale_spec.rb +++ b/spec/shared/locale_spec.rb @@ -136,7 +136,25 @@ end end - context 'with a locale instance' do + context 'with a locale with interesting ancestry' do + let(:locale) { described_class.new('es', nil, 'CR') } + + describe '#ancestor_chain' do + it 'identifies the correct ancestors' do + expect(locale.ancestor_chain.map(&:dasherized)).to eq( + ['es-CR', 'es-419', 'es'] + ) + end + end + + describe '#max_supported' do + it 'identifies the correct supported parent locale' do + expect(locale.max_supported.dasherized).to eq('es-419') + end + end + end + + context 'with a locale instance (Korean)' do let(:locale) { described_class.new('ko', nil, 'KR') } describe '#full_script' do @@ -212,5 +230,19 @@ ) end end + + describe '#ancestor_chain' do + it 'identifies the correct ancestors' do + expect(locale.ancestor_chain.map(&:dasherized)).to eq( + ['ko-KR', 'ko'] + ) + end + end + + describe '#max_supported' do + it 'identifies the correct supported locale' do + expect(locale.max_supported.dasherized).to eq('ko') + end + end end end From 996a16836b3697cfb0e6889a6d88240bf1dabc39 Mon Sep 17 00:00:00 2001 From: Cameron Dutro Date: Wed, 29 Jul 2020 10:58:28 -0700 Subject: [PATCH 2/2] No need to require set --- lib/twitter_cldr/shared/locale.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/twitter_cldr/shared/locale.rb b/lib/twitter_cldr/shared/locale.rb index f98c18840..289341780 100644 --- a/lib/twitter_cldr/shared/locale.rb +++ b/lib/twitter_cldr/shared/locale.rb @@ -3,8 +3,6 @@ # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 -require 'set' - module TwitterCldr module Shared class Locale