Skip to content

Commit

Permalink
Import property value aliases
Browse files Browse the repository at this point in the history
  • Loading branch information
camertron committed Sep 26, 2015
1 parent 9185a80 commit 1919a6a
Show file tree
Hide file tree
Showing 4 changed files with 2,201 additions and 22 deletions.
18 changes: 18 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ task :update do
"update:locales_resources", # per locale (+ units resources using different CLDR and ruby-cldr, see LocalesResourcesImporter)
"update:unicode_data",
"update:unicode_properties",
"update:unicode_scripts",
"update:unicode_property_value_aliases",
"update:generate_casefolder", # must come after unicode data
"update:composition_exclusions",
"update:postal_codes",
Expand Down Expand Up @@ -126,6 +128,22 @@ namespace :update do
).import
end

desc 'Import Unicode script resources'
task :unicode_scripts, :unicode_scripts_path do |_, args|
TwitterCldr::Resources::UnicodeScriptsImporter.new(
args[:unicode_scripts_path] || './vendor/unicode-data',
'./resources/unicode_data/scripts.yml'
).import
end

desc 'Import unicode property value aliases'
task :unicode_property_value_aliases, :property_value_aliases_path do |_, args|
TwitterCldr::Resources::UnicodePropertyValueAliasesImporter.new(
args[:property_value_aliases_path] || './vendor/unicode-data',
'./resources/unicode_data/property_value_aliases.yml'
).import
end

desc 'Generate the casefolder class. Depends on unicode data'
task :generate_casefolder do
TwitterCldr::Resources::CasefolderClassGenerator.new(
Expand Down
46 changes: 24 additions & 22 deletions lib/twitter_cldr/resources.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,29 @@

module TwitterCldr
module Resources
autoload :Uli, 'twitter_cldr/resources/uli'
autoload :UnicodeImporter, 'twitter_cldr/resources/unicode_importer'
autoload :IcuBasedImporter, 'twitter_cldr/resources/icu_based_importer'
autoload :CanonicalCompositionsUpdater, 'twitter_cldr/resources/canonical_compositions_updater'
autoload :CollationTriesDumper, 'twitter_cldr/resources/collation_tries_dumper'
autoload :CompositionExclusionsImporter, 'twitter_cldr/resources/composition_exclusions_importer'
autoload :CurrenciesImporter, 'twitter_cldr/resources/currencies_importer'
autoload :CustomLocalesResourcesImporter, 'twitter_cldr/resources/custom_locales_resources_importer'
autoload :LanguageCodesImporter, 'twitter_cldr/resources/language_codes_importer'
autoload :Loader, 'twitter_cldr/resources/loader'
autoload :LocalesResourcesImporter, 'twitter_cldr/resources/locales_resources_importer'
autoload :PhoneCodesImporter, 'twitter_cldr/resources/phone_codes_importer'
autoload :PostalCodesImporter, 'twitter_cldr/resources/postal_codes_importer'
autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
autoload :UnicodeDataImporter, 'twitter_cldr/resources/unicode_data_importer'
autoload :UnicodePropertiesImporter, 'twitter_cldr/resources/unicode_properties_importer'
autoload :BidiTestImporter, 'twitter_cldr/resources/bidi_test_importer'
autoload :NormalizationQuickCheckImporter, 'twitter_cldr/resources/normalization_quick_check_importer'
autoload :RbnfTestImporter, 'twitter_cldr/resources/rbnf_test_importer'
autoload :ReadmeRenderer, 'twitter_cldr/resources/readme_renderer'
autoload :CasefolderClassGenerator, 'twitter_cldr/resources/casefolder_class_generator'
autoload :RegexpAstGenerator, 'twitter_cldr/resources/regexp_ast_generator'
autoload :Uli, 'twitter_cldr/resources/uli'
autoload :UnicodeImporter, 'twitter_cldr/resources/unicode_importer'
autoload :IcuBasedImporter, 'twitter_cldr/resources/icu_based_importer'
autoload :CanonicalCompositionsUpdater, 'twitter_cldr/resources/canonical_compositions_updater'
autoload :CollationTriesDumper, 'twitter_cldr/resources/collation_tries_dumper'
autoload :CompositionExclusionsImporter, 'twitter_cldr/resources/composition_exclusions_importer'
autoload :CurrenciesImporter, 'twitter_cldr/resources/currencies_importer'
autoload :CustomLocalesResourcesImporter, 'twitter_cldr/resources/custom_locales_resources_importer'
autoload :LanguageCodesImporter, 'twitter_cldr/resources/language_codes_importer'
autoload :Loader, 'twitter_cldr/resources/loader'
autoload :LocalesResourcesImporter, 'twitter_cldr/resources/locales_resources_importer'
autoload :PhoneCodesImporter, 'twitter_cldr/resources/phone_codes_importer'
autoload :PostalCodesImporter, 'twitter_cldr/resources/postal_codes_importer'
autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
autoload :UnicodeDataImporter, 'twitter_cldr/resources/unicode_data_importer'
autoload :UnicodePropertiesImporter, 'twitter_cldr/resources/unicode_properties_importer'
autoload :UnicodeScriptsImporter, 'twitter_cldr/resources/unicode_scripts_importer'
autoload :UnicodePropertyValueAliasesImporter, 'twitter_cldr/resources/unicode_property_value_aliases_importer'
autoload :BidiTestImporter, 'twitter_cldr/resources/bidi_test_importer'
autoload :NormalizationQuickCheckImporter, 'twitter_cldr/resources/normalization_quick_check_importer'
autoload :RbnfTestImporter, 'twitter_cldr/resources/rbnf_test_importer'
autoload :ReadmeRenderer, 'twitter_cldr/resources/readme_renderer'
autoload :CasefolderClassGenerator, 'twitter_cldr/resources/casefolder_class_generator'
autoload :RegexpAstGenerator, 'twitter_cldr/resources/regexp_ast_generator'
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'twitter_cldr/resources/download'

module TwitterCldr
module Resources

class UnicodePropertyValueAliasesImporter < UnicodeImporter
SCRIPTS_URL = 'ftp://ftp.unicode.org/Public/UNIDATA/PropertyValueAliases.txt'

# Arguments:
#
# input_path - path to a directory containing Scripts.txt
# output_path - output directory for imported YAML files
#
def initialize(input_path, output_path)
@input_path = input_path
@output_path = output_path
end

def import
File.open(@output_path, 'w+') do |f|
f.write(YAML.dump(parse_property_value_aliases))
end
end

protected

def parse_property_value_aliases
Hash.new { |h, k| h[k] = [] }.tap do |result|
parse_standard_file(scripts_data_file) do |data|
property = data[0]
result[property.to_sym] << if property == 'ccc'
parse_ccc_alias(data)
else
parse_alias(data)
end
end
end
end

def parse_alias(data)
{
abbreviated_name: data[1],
long_name: data[2]
}
end

def parse_ccc_alias(data)
{
numeric: data[1], # don't know what this means
abbreviated_name: data[2],
long_name: data[3]
}
end

def scripts_data_file
TwitterCldr::Resources.download_if_necessary(
File.join(@input_path, 'PropertyValueAliases.txt'), SCRIPTS_URL
)
end
end

end
end
Loading

0 comments on commit 1919a6a

Please sign in to comment.