Skip to content

Commit

Permalink
Merge branch 'master' into likely_subtags
Browse files Browse the repository at this point in the history
  • Loading branch information
camertron committed Sep 28, 2015
2 parents 4d3a824 + 31199f5 commit 3cc033e
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 83 deletions.
2 changes: 1 addition & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ namespace :update do
task :unicode_scripts, :unicode_scripts_path do |_, args|
TwitterCldr::Resources::UnicodeScriptsImporter.new(
args[:unicode_scripts_path] || './vendor/unicode-data',
'./resources/unicode_data/properties/scripts.yml'
'./resources/unicode_data/properties/script.yml'
).import
end

Expand Down
4 changes: 1 addition & 3 deletions lib/twitter_cldr/resources/unicode_scripts_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def initialize(input_path, output_path)
end

def import
File.open(@output_path, 'w+') do |f|
f.write(YAML.dump(parse_scripts))
end
File.write(@output_path, YAML.dump(parse_scripts))
end

protected
Expand Down
15 changes: 7 additions & 8 deletions lib/twitter_cldr/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
module TwitterCldr
module Utils

autoload :CodePoints, 'twitter_cldr/utils/code_points'
autoload :YAML, 'twitter_cldr/utils/yaml'
autoload :RangeHelpers, 'twitter_cldr/utils/range_helpers'
autoload :RangeSet, 'twitter_cldr/utils/range_set'
autoload :RangeHash, 'twitter_cldr/utils/range_hash'
autoload :RegexpAst, 'twitter_cldr/utils/regexp_ast'
autoload :RegexpSampler, 'twitter_cldr/utils/regexp_sampler'
autoload :ScriptDetector, 'twitter_cldr/utils/script_detector'
autoload :CodePoints, 'twitter_cldr/utils/code_points'
autoload :RangeSet, 'twitter_cldr/utils/range_set'
autoload :RegexpAst, 'twitter_cldr/utils/regexp_ast'
autoload :RegexpSampler, 'twitter_cldr/utils/regexp_sampler'
autoload :ScriptDetector, 'twitter_cldr/utils/script_detector'
autoload :ScriptDetectionResult, 'twitter_cldr/utils/script_detector'
autoload :YAML, 'twitter_cldr/utils/yaml'

class << self

Expand Down
48 changes: 0 additions & 48 deletions lib/twitter_cldr/utils/range_helpers.rb

This file was deleted.

57 changes: 48 additions & 9 deletions lib/twitter_cldr/utils/range_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ def empty?
ranges.empty?
end

def <<(range)
ranges << range
flatten
end

def union(range_set)
self.class.new(range_set.ranges + ranges)
end
Expand Down Expand Up @@ -158,7 +163,41 @@ def difference(range_set)

protected

include RangeHelpers
def overlap?(range1, range2)
is_numeric_range?(range1) && is_numeric_range?(range2) && (
front_overlap?(range1, range2) ||
rear_overlap?(range1, range2) ||
full_overlap?(range1, range2)
)
end

def front_overlap?(range1, range2)
range1.last >= range2.first && range1.last <= range2.last
end

def rear_overlap?(range1, range2)
range1.first >= range2.first && range1.first <= range2.last
end

# range1 entirely contains range2
def full_overlap?(range1, range2)
range1.first <= range2.first && range1.last >= range2.last
end

# range2 entirely contains range1
def fully_overlapped_by?(range1, range2)
range2.first <= range1.first && range1.last <= range2.last
end

# returns true if range1 and range2 are within 1 of each other
def adjacent?(range1, range2)
is_numeric_range?(range1) && is_numeric_range?(range2) &&
(range1.last == range2.first - 1 || range2.first == range1.last + 1)
end

def is_numeric_range?(range)
range.first.is_a?(Numeric) && range.last.is_a?(Numeric)
end

def flatten
return if ranges.size <= 1
Expand Down Expand Up @@ -191,30 +230,30 @@ def flatten

def find_intersection(range1, range2)
# range2 entirely contains range1
if range2.first <= range1.first && range1.last <= range2.last
if fully_overlapped_by?(range1, range2)
range1.dup
elsif range1.last >= range2.first && range1.last <= range2.last
elsif front_overlap?(range1, range2)
range2.first..range1.last
elsif range1.first >= range2.first && range1.first <= range2.last
elsif rear_overlap?(range1, range2)
range1.first..range2.last
elsif range1.first <= range2.first && range1.last >= range2.last
elsif full_overlap?(range1, range2)
[range1.first, range2.first].max..[range1.last, range2.last].min
end
end

# subtracts range1 from range2 (range2 - range1)
def find_subtraction(range1, range2)
# case: range1 contains range2 entirely (also handles equal case)
result = if range1.first <= range2.first && range2.last <= range1.last
result = if full_overlap?(range1, range2)
[]
# case: range1 comes in the middle
elsif range2.first <= range1.first && range2.last >= range1.last
elsif fully_overlapped_by?(range1, range2)
[range2.first..(range1.first - 1), (range1.last + 1)..range2.last]
# case: range1 trails
elsif range2.last >= range1.first && range1.last >= range2.last
elsif rear_overlap?(range1, range2)
[range2.first..(range1.first - 1)]
# case: range1 leads
elsif range1.last >= range2.first && range1.first <= range2.first
elsif front_overlap?(range1, range2)
[(range1.last + 1)..range2.last]
end

Expand Down
17 changes: 4 additions & 13 deletions lib/twitter_cldr/utils/script_detector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,8 @@ def initialize(scores)
end

def best_guess
max_score = 0
max_script_name = nil

scores.each_pair do |script_name, score|
if score > max_score
max_score = score
max_script_name = script_name
end
end

max_script_name
max_score = scores.max_by { |(_, score)| score }
max_score.first if max_score
end

def score_for(script_name)
Expand All @@ -47,10 +38,10 @@ def detect_scripts(text)
)
end

protected
private

def scores_for(text)
Hash.new { |h, k| h[k] = 0 }.tap do |result|
Hash.new(0).tap do |result|
text.chars.each do |char|
script = scripts_hash[char]
result[script] += 1 if script
Expand Down
26 changes: 25 additions & 1 deletion spec/utils/range_set_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@
expect(set.to_a).to eq([1..3, 7..10])
end

it "results in an empty set if the deducted range entirely overlaps the existing ranges" do
set = RangeSet.new([3..10]).subtract(RangeSet.new([1..15]))
expect(set.to_a).to eq([])
end

it "subtracts the intersection when the range set contians multiple matching ranges" do
set = RangeSet.new([1..5, 7..10]).subtract(RangeSet.new([3..8]))
expect(set.to_a).to eq([1..2, 9..10])
Expand Down Expand Up @@ -153,7 +158,7 @@
end

describe "#include?" do
let (:set) { RangeSet.new([1..5, 9..16]) }
let(:set) { RangeSet.new([1..5, 9..16]) }

it "returns true if the set completely includes the range, false otherwise" do
expect(set).to include(10..15)
Expand All @@ -168,4 +173,23 @@
expect(set).not_to include(8)
end
end

describe '#<<' do
let(:set) { RangeSet.new([5..10]) }

it "adds a new range to the set when nothing overlaps" do
set << (1..3)
expect(set.to_a).to eq([1..3, 5..10])
end

it "adds a new range to the set and handles overlapping" do
set << (3..6)
expect(set.to_a).to eq([3..10])
end

it "adds a new range to the set and handles full overlapping" do
set << (1..15)
expect(set.to_a).to eq([1..15])
end
end
end
7 changes: 7 additions & 0 deletions spec/utils/script_detector_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,10 @@
end
end
end

describe ScriptDetectionResult do
it 'returns nil if no scripts were detected' do
result = ScriptDetectionResult.new({})
expect(result.best_guess).to be_nil
end
end

0 comments on commit 3cc033e

Please sign in to comment.