Skip to content

Commit

Permalink
Ever closer...
Browse files Browse the repository at this point in the history
  • Loading branch information
camertron committed Nov 17, 2019
1 parent 3e9f2b1 commit a17af81
Show file tree
Hide file tree
Showing 13 changed files with 731 additions and 108 deletions.
5 changes: 5 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,11 @@ namespace :update do
TwitterCldr::Resources::TimezonesImporter.new.import
end

desc 'Import timezone region data'
task :timezone_regions do
TwitterCldr::Resources::TimezoneRegionsImporter.new.import
end

desc 'Import timezone data'
task :bcp47_timezone_aliases do
TwitterCldr::Resources::Bcp47TimezoneAliasesImporter.new.import
Expand Down
2 changes: 2 additions & 0 deletions lib/twitter_cldr/resources.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ module Resources
autoload :SegmentTestsImporter, 'twitter_cldr/resources/segment_tests_importer'
autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
autoload :TimezonesImporter, 'twitter_cldr/resources/timezones_importer'
autoload :TimezoneRegionsImporter, 'twitter_cldr/resources/timezone_regions_importer'
autoload :TimezoneTestsImporter, 'twitter_cldr/resources/timezone_tests_importer'
autoload :TransformTestsImporter, 'twitter_cldr/resources/transform_tests_importer'
autoload :TransformsImporter, 'twitter_cldr/resources/transforms_importer'
Expand Down Expand Up @@ -59,6 +60,7 @@ def standard_importer_classes
SegmentTestsImporter,
TailoringImporter,
TimezonesImporter,
TimezoneRegionsImporter,
TimezoneTestsImporter,
TransformTestsImporter,
TransformsImporter,
Expand Down
40 changes: 40 additions & 0 deletions lib/twitter_cldr/resources/timezone_regions_importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# encoding: UTF-8

# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0

require 'fileutils'
require 'tzinfo'

module TwitterCldr
module Resources

# This class should be used with JRuby in 1.9 mode
class TimezoneRegionsImporter < Importer
requirement :icu, Versions.icu_version
output_path 'shared'
ruby_engine :jruby

def execute
output_path = params.fetch(:output_path)
FileUtils.mkdir_p(output_path)
output_file = File.join(output_path, 'timezone_regions.yml')
File.write(output_file, YAML.dump(regions))
end

private

def regions
TZInfo::Timezone.all_identifiers.each_with_object({}) do |id, ret|
region = zone_meta.getCanonicalCountry(id)
ret[id.to_sym] = region if region
end
end

def zone_meta
@zone_meta ||= requirements[:icu].get_class('com.ibm.icu.impl.ZoneMeta')
end
end

end
end
4 changes: 2 additions & 2 deletions lib/twitter_cldr/resources/timezone_tests_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module Resources
class TimezoneTestsImporter < Importer
requirement :icu, Versions.icu_version
output_path File.join(TwitterCldr::SPEC_DIR, 'timezones', 'tests')
locales TwitterCldr.supported_locales
ruby_engine :jruby

TYPE_MAP = {
Expand All @@ -27,13 +28,12 @@ class TimezoneTestsImporter < Importer
}

def execute
binding.pry
check_tzdata_versions

output_path = params.fetch(:output_path)
FileUtils.mkdir_p(output_path)

TwitterCldr.supported_locales.each do |locale|
params[:locales].each do |locale|
output_file = File.join(output_path, "#{locale}.yml")

File.write(
Expand Down
5 changes: 3 additions & 2 deletions lib/twitter_cldr/timezones/generic_location.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def display_name_for(date, fmt = :location)
private

def generic_location_display_name
if region_code = ZoneMeta.canonical_country_for(tz_id)
if ZoneMeta.is_primary_region?(region_code)
if region_code = ZoneMeta.canonical_country_for(tz.identifier)
if ZoneMeta.is_primary_region?(region_code, tz_id)
region_name = Territories.from_territory_code_for_locale(region_code, tz.locale)
return region_formats[:generic].sub('{0}', region_name || region_code)
else
Expand Down Expand Up @@ -88,6 +88,7 @@ def generic_display_name(date, fmt)
golden_period = golden_zone.period_for_local(golden_date)

if period.base_utc_offset != golden_period.base_utc_offset || period.std_offset != golden_period.std_offset
return nil unless mz_name
return partial_location_name_for(tz_metazone.metazone, mz_name)
else
return mz_name
Expand Down
79 changes: 49 additions & 30 deletions lib/twitter_cldr/timezones/gmt_location.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,49 +8,55 @@ module Timezones
class GmtLocation < Location
FORMATS = [:long, :short].freeze
DEFAULT_FORMAT = :short
DEFAULT_GMT_ZERO_FORMAT = 'GMT'.freeze

def display_name_for(date, format = DEFAULT_FORMAT)
offset = tz.period_for_local(date).offset
offset_sec = offset.base_utc_offset + offset.std_offset
offset_hour ||= offset_sec / 60 / 60
offset_min ||= (offset_sec / 60) % 60
offset_secs = offset.base_utc_offset + offset.std_offset
return gmt_zero_format if offset_secs == 0

case format
when :short
hour_fmt = offset_hour.abs.to_s.rjust(2, '0')
minute_fmt = offset_min.abs.to_s.rjust(2, '0')
sign = sign_for(offset_sec) == :positive ? '+' : '-'
"#{sign}#{hour_fmt}#{minute_fmt}"

when :long
# TODO: this is broken, need special formatting rules
if offset_hour == 0 && offset_minute == 0
gmt_zero_format
else
gmt_format.sub('{0}', hour)
end

else
# @TODO: raise error?
end
gmt_format.sub('{0}', format_offset(offset_secs, format))
end

private

def sign_for(number)
number.positive? || number.zero? ? :positive : :negative
def format_offset(offset_secs, format)
positive = offset_secs >= 0
offset_secs = offset_secs.abs
offset_hour ||= offset_secs / 60 / 60
offset_min ||= (offset_secs / 60) % 60
offset_sec ||= offset_secs % 60

tokens = hour_format(positive ? :positive : :negative)
format_tokens(tokens, format, offset_hour, offset_min, offset_sec)
end

def numbering_system
@numbering_system ||= TwitterCldr::Shared::NumberingSystem.for_locale(locale)
def format_tokens(tokens, format, hour, min, sec)
''.tap do |result|
tokens.each do |token|
case token.type
when :plaintext
result << token.value
when :pattern
case token.value[0]
when 'H'
result << offset_digits(hour, format == :short ? 1 : 2)
when 'm'
result << offset_digits(min, 2)
when 's'
result << offset_digits(sec, 2)
end
end
end
end
end

def gmt_format
resource[:formats][:gmt_format]
def offset_digits(n, min_digits)
number_system.transliterate(n.to_s.rjust(min_digits, '0'))
end

def gmt_zero_format
resource[:formats][:gmt_zero_format]
def number_system
@number_system ||= TwitterCldr::Shared::NumberingSystem.for_locale(tz.locale)
end

def hour_format(type)
Expand All @@ -63,7 +69,20 @@ def hour_format(type)
end

def hour_formats
@hour_formats ||= resource[:formats][:hour_format].split(';')
@hour_formats ||= resource[:formats][:hour_formats][:generic]
.split(';')
.map do |pat|
TwitterCldr::Tokenizers::TimeTokenizer.tokenizer.tokenize(pat)
end
end

def gmt_zero_format
@gmt_zero_format ||= resource[:formats][:gmt_zero_formats][:generic] ||
DEFAULT_GMT_ZERO_FORMAT
end

def gmt_format
@gmt_format ||= resource[:formats][:gmt_formats][:generic]
end
end
end
Expand Down
3 changes: 2 additions & 1 deletion lib/twitter_cldr/timezones/timezone.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ class Timezone
short_generic: :short,
}

attr_reader :orig_tz, :tz, :locale
attr_reader :orig_tz, :canon_tz, :tz, :locale

def initialize(tz_id, locale = TwitterCldr.locale)
@orig_tz = TZInfo::Timezone.get(tz_id)
@canon_tz = @orig_tz.canonical_zone
@tz = TZInfo::Timezone.get(ZoneMeta.normalize(tz_id))
@locale = locale
end
Expand Down
40 changes: 8 additions & 32 deletions lib/twitter_cldr/timezones/zone_meta.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,7 @@ class ZoneMeta
class << self
def normalize(tz_id)
tz_id = tz_id.to_s.strip

if found = aliases[tz_id.to_sym]
found
elsif found = bcp47_aliases[tz_id.to_sym]
found
else
tz_id
# TZInfo::Timezone.get(tz_id).canonical_identifier
end
bcp47_aliases[tz_id.to_sym] || tz_id
end

def canonical_country_for(tz_id)
Expand All @@ -89,11 +81,11 @@ def canonical_country_for(tz_id)
end

def region_for_tz(tz_id)
region_map[tz_id]
regions_resource[tz_id.to_sym]
end

def is_primary_region?(region_code)
primary_zones.include?(region_code) ||
def is_primary_region?(region_code, tz_id)
primary_zones[region_code.to_sym] == tz_id ||
TZInfo::Country.get(region_code).zone_identifiers.size <= 1
end

Expand All @@ -107,26 +99,6 @@ def tz_metazone_for(tz_id, date)

private

def zone_country_code_map
@zone_country_code_map ||= TZInfo::Country.all_codes.each_with_object({}) do |country_code, ret|
TZInfo::Country.get(country_code).zone_identifiers.each do |zone_id|
# should only be one country code per zone (empirically true although
# maybe not theoretically true)
ret[zone_id] = country_code
end
end
end

def region_map
@region_map ||= TZInfo::Country.all_codes.each_with_object({}) do |region_code, ret|
TZInfo::Country.get(region_code).zone_identifiers.each do |zone_id|
# should only be one country code per zone (empirically true although
# maybe not theoretically true)
ret[zone_id] = region_code
end
end
end

def aliases
@aliases ||= aliases_resource[:zone].each_with_object({}) do |(_, zones), ret|
ret.merge!(zones)
Expand Down Expand Up @@ -156,6 +128,10 @@ def metazones_resource
def aliases_resource
@aliases_resource ||= TwitterCldr.get_resource(:shared, :aliases)[:aliases]
end

def regions_resource
@regions_resource ||= TwitterCldr.get_resource(:shared, :timezone_regions)
end
end
end
end
Expand Down
64 changes: 36 additions & 28 deletions lib/twitter_cldr/tokenizers/calendars/date_time_tokenizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,57 @@ module TwitterCldr
module Tokenizers
class DateTimeTokenizer

class << self
def tokenizer
@tokenizer ||= Tokenizer.new([
TokenRecognizer.new(:date, /\{\{date\}\}/),
TokenRecognizer.new(:time, /\{\{time\}\}/),
TokenRecognizer.new(:plaintext, /'.*'/),
TokenRecognizer.new(:plaintext, //)
])
end

def full_tokenizer
@full_tokenizer ||= begin
new_tok = Tokenizer.union(
data_reader.date_reader.tokenizer.tokenizer,
data_reader.time_reader.tokenizer.tokenizer
) do |recognizer|
recognizer.token_type != :plaintext
end

new_tok.recognizers << TokenRecognizer.new(:plaintext, //)
new_tok
end
end
end

attr_reader :data_reader

def initialize(data_reader)
@data_reader = data_reader
end

def tokenize(pattern)
expand_tokens(
PatternTokenizer.new(data_reader, tokenizer).tokenize(pattern)
)
expand_tokens(tokenizer.tokenize(pattern))
end

# Tokenizes mixed date and time pattern strings,
# used to tokenize the additional date format patterns.
def full_tokenize(pattern)
PatternTokenizer.new(data_reader, full_tokenizer).tokenize(pattern)
full_tokenizer.tokenize(pattern)
end

protected

def tokenizer
@tokenizer ||= PatternTokenizer.new(data_reader, self.class.tokenizer)
end

def full_tokenizer
@full_tokenizer ||= PatternTokenizer.new(data_reader, self.class.full_tokenizer)
end

def expand_tokens(tokens)
tokens.inject([]) do |ret, token|
ret + case token.type
Expand All @@ -50,29 +81,6 @@ def expand_time(token)
time_reader.tokenizer.tokenize(time_reader.pattern)
end

def full_tokenizer
@@full_tokenizer ||= begin
new_tok = Tokenizer.union(
data_reader.date_reader.tokenizer.tokenizer,
data_reader.time_reader.tokenizer.tokenizer
) do |recognizer|
recognizer.token_type != :plaintext
end

new_tok.recognizers << TokenRecognizer.new(:plaintext, //)
new_tok
end
end

def tokenizer
@tokenizer ||= Tokenizer.new([
TokenRecognizer.new(:date, /\{\{date\}\}/),
TokenRecognizer.new(:time, /\{\{time\}\}/),
TokenRecognizer.new(:plaintext, /'.*'/),
TokenRecognizer.new(:plaintext, //)
])
end

end
end
end
end
Loading

0 comments on commit a17af81

Please sign in to comment.