Skip to content

Commit

Permalink
Choose a supported locale based on ancestry (#237)
Browse files Browse the repository at this point in the history
  • Loading branch information
camertron committed Aug 9, 2020
2 parents c79c8da + 996a168 commit 4f3053a
Show file tree
Hide file tree
Showing 6 changed files with 318 additions and 5 deletions.
5 changes: 5 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ namespace :update do
TwitterCldr::Resources::CasefolderClassGenerator.new.import
end

desc 'Import parent locale data'
task :parent_locales do
TwitterCldr::Resources::ParentLocalesImporter.new.import
end

desc 'Import postal codes resource'
task :postal_codes do
TwitterCldr::Resources::PostalCodesImporter.new.import
Expand Down
2 changes: 2 additions & 0 deletions lib/twitter_cldr/resources.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ module Resources
autoload :Loader, 'twitter_cldr/resources/loader'
autoload :LocalesResourcesImporter, 'twitter_cldr/resources/locales_resources_importer'
autoload :NumberFormatsImporter, 'twitter_cldr/resources/number_formats_importer'
autoload :ParentLocalesImporter, 'twitter_cldr/resources/parent_locales_importer'
autoload :PostalCodesImporter, 'twitter_cldr/resources/postal_codes_importer'
autoload :Properties, 'twitter_cldr/resources/properties'
autoload :SegmentDictionariesImporter, 'twitter_cldr/resources/segment_dictionaries_importer'
Expand Down Expand Up @@ -59,6 +60,7 @@ def standard_importer_classes
LanguageCodesImporter,
LocalesResourcesImporter,
NumberFormatsImporter,
ParentLocalesImporter,
PostalCodesImporter,
RbnfTestImporter,
SegmentDictionariesImporter,
Expand Down
54 changes: 54 additions & 0 deletions lib/twitter_cldr/resources/parent_locales_importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'nokogiri'

module TwitterCldr
module Resources

class ParentLocalesImporter < Importer

requirement :cldr, Versions.cldr_version
output_path 'shared'
ruby_engine :mri

private

def execute
output_file = File.join(output_path, 'parent_locales.yml')
File.write(output_file, YAML.dump(parent_locales))
end

def parent_locales
@parent_locales ||= supplemental_data.xpath('//parentLocales/parentLocale').each_with_object({}) do |node, ret|
parent = node.attr('parent')
locales = node.attr('locales').split(' ')

locales.each do |locale|
ret[locale] = parent
end
end
end

def output_path
params.fetch(:output_path)
end

def supplemental_data
@supplemental_data ||= Nokogiri.XML(
File.read(
File.join(
requirements[:cldr].common_path,
'supplemental',
'supplementalData.xml'
)
)
)
end

end

end
end
56 changes: 52 additions & 4 deletions lib/twitter_cldr/shared/locale.rb
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ def validity_resource
@validity_resource ||=
TwitterCldr.get_resource('shared', 'validity_data')[:validity_data]
end

def parent_locales
@parent_locales ||= TwitterCldr.get_resource('shared', 'parent_locales')
end
end

attr_accessor :language, :script, :region, :variants
Expand Down Expand Up @@ -239,11 +243,9 @@ def max_supported

def supported
@supported ||= begin
found = permutations('-').find do |perm|
TwitterCldr.supported_locale?(perm)
ancestor_chain.sort.find do |loc|
TwitterCldr.supported_locale?(loc.dasherized)
end

self.class.new(found) if found
end
end

Expand Down Expand Up @@ -273,6 +275,52 @@ def permutations(delimiter = '_')
perms.uniq
end

def ==(other)
language == other.language &&
script == other.script &&
region == other.region &&
variants == other.variants
end

alias eql? ==

def hash
to_a.hash
end

def sort_key
k = 0
k += 3 if language
k += 2 if script
k += 1 if region
k
end

def <=>(other)
other.sort_key <=> sort_key
end

def ancestor_chain
ancestry = [self]
remaining = [self]

until remaining.empty?
locale = remaining.pop

if parent = self.class.send(:parent_locales)[locale.to_s]
parent = self.class.parse(parent)
ancestry << parent
remaining << parent
else
parents = locale.permutations.map { |p| self.class.parse(p) }
remaining += parents - ancestry
ancestry += parents - ancestry
end
end

ancestry
end

end
end
end
172 changes: 172 additions & 0 deletions resources/shared/parent_locales.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
---
az_Arab: root
az_Cyrl: root
blt_Latn: root
bm_Nkoo: root
bs_Cyrl: root
byn_Latn: root
cu_Glag: root
dje_Arab: root
dyo_Arab: root
en_Dsrt: root
en_Shaw: root
ff_Adlm: root
ff_Arab: root
ha_Arab: root
hi_Latn: root
iu_Latn: root
kk_Arab: root
ks_Deva: root
ku_Arab: root
ky_Arab: root
ky_Latn: root
ml_Arab: root
mn_Mong: root
mni_Mtei: root
ms_Arab: root
pa_Arab: root
sat_Deva: root
sd_Deva: root
sd_Khoj: root
sd_Sind: root
shi_Latn: root
so_Arab: root
sr_Latn: root
sw_Arab: root
tg_Arab: root
ug_Cyrl: root
uz_Arab: root
uz_Cyrl: root
vai_Latn: root
wo_Arab: root
yo_Arab: root
yue_Hans: root
zh_Hant: root
en_150: en_001
en_AG: en_001
en_AI: en_001
en_AU: en_001
en_BB: en_001
en_BM: en_001
en_BS: en_001
en_BW: en_001
en_BZ: en_001
en_CA: en_001
en_CC: en_001
en_CK: en_001
en_CM: en_001
en_CX: en_001
en_CY: en_001
en_DG: en_001
en_DM: en_001
en_ER: en_001
en_FJ: en_001
en_FK: en_001
en_FM: en_001
en_GB: en_001
en_GD: en_001
en_GG: en_001
en_GH: en_001
en_GI: en_001
en_GM: en_001
en_GY: en_001
en_HK: en_001
en_IE: en_001
en_IL: en_001
en_IM: en_001
en_IN: en_001
en_IO: en_001
en_JE: en_001
en_JM: en_001
en_KE: en_001
en_KI: en_001
en_KN: en_001
en_KY: en_001
en_LC: en_001
en_LR: en_001
en_LS: en_001
en_MG: en_001
en_MO: en_001
en_MS: en_001
en_MT: en_001
en_MU: en_001
en_MW: en_001
en_MY: en_001
en_NA: en_001
en_NF: en_001
en_NG: en_001
en_NR: en_001
en_NU: en_001
en_NZ: en_001
en_PG: en_001
en_PH: en_001
en_PK: en_001
en_PN: en_001
en_PW: en_001
en_RW: en_001
en_SB: en_001
en_SC: en_001
en_SD: en_001
en_SG: en_001
en_SH: en_001
en_SL: en_001
en_SS: en_001
en_SX: en_001
en_SZ: en_001
en_TC: en_001
en_TK: en_001
en_TO: en_001
en_TT: en_001
en_TV: en_001
en_TZ: en_001
en_UG: en_001
en_VC: en_001
en_VG: en_001
en_VU: en_001
en_WS: en_001
en_ZA: en_001
en_ZM: en_001
en_ZW: en_001
en_AT: en_150
en_BE: en_150
en_CH: en_150
en_DE: en_150
en_DK: en_150
en_FI: en_150
en_NL: en_150
en_SE: en_150
en_SI: en_150
es_AR: es_419
es_BO: es_419
es_BR: es_419
es_BZ: es_419
es_CL: es_419
es_CO: es_419
es_CR: es_419
es_CU: es_419
es_DO: es_419
es_EC: es_419
es_GT: es_419
es_HN: es_419
es_MX: es_419
es_NI: es_419
es_PA: es_419
es_PE: es_419
es_PR: es_419
es_PY: es_419
es_SV: es_419
es_US: es_419
es_UY: es_419
es_VE: es_419
pt_AO: pt_PT
pt_CH: pt_PT
pt_CV: pt_PT
pt_FR: pt_PT
pt_GQ: pt_PT
pt_GW: pt_PT
pt_LU: pt_PT
pt_MO: pt_PT
pt_MZ: pt_PT
pt_ST: pt_PT
pt_TL: pt_PT
zh_Hant_MO: zh_Hant_HK
34 changes: 33 additions & 1 deletion spec/shared/locale_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,25 @@
end
end

context 'with a locale instance' do
context 'with a locale with interesting ancestry' do
let(:locale) { described_class.new('es', nil, 'CR') }

describe '#ancestor_chain' do
it 'identifies the correct ancestors' do
expect(locale.ancestor_chain.map(&:dasherized)).to eq(
['es-CR', 'es-419', 'es']
)
end
end

describe '#max_supported' do
it 'identifies the correct supported parent locale' do
expect(locale.max_supported.dasherized).to eq('es-419')
end
end
end

context 'with a locale instance (Korean)' do
let(:locale) { described_class.new('ko', nil, 'KR') }

describe '#full_script' do
Expand Down Expand Up @@ -212,5 +230,19 @@
)
end
end

describe '#ancestor_chain' do
it 'identifies the correct ancestors' do
expect(locale.ancestor_chain.map(&:dasherized)).to eq(
['ko-KR', 'ko']
)
end
end

describe '#max_supported' do
it 'identifies the correct supported locale' do
expect(locale.max_supported.dasherized).to eq('ko')
end
end
end
end

0 comments on commit 4f3053a

Please sign in to comment.