Skip to content

Commit

Permalink
So close...
Browse files Browse the repository at this point in the history
  • Loading branch information
camertron committed May 1, 2017
1 parent 1b13071 commit ac78984
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
22 changes: 19 additions & 3 deletions lib/twitter_cldr/parsers/unicode_regex/character_set.rb
Expand Up @@ -14,6 +14,16 @@ class CharacterSet < Component

attr_reader :property_name, :property_value

# for some reason the emoji property contains too many code points,
# stuff like #, *, and numbers
EMOJI_CODE_POINTS =
CodePoint.code_points_for_property('Emoji', nil)
.subtract(
TwitterCldr::Utils::RangeSet.from_array(
[0x23, 0x2A, 0xA9, 0xAE] + (0x30..0x39).to_a
)
)

def initialize(text)
if (name_parts = text.split("=")).size == 2
@property_name, @property_value = name_parts.map(&:strip)
Expand Down Expand Up @@ -43,9 +53,15 @@ def to_s
private

def codepoints
code_points = CodePoint.code_points_for_property(
*normalized_property
)
property_name, property_value = normalized_property

code_points = if property_name.downcase == 'emoji'
EMOJI_CODE_POINTS
else
CodePoint.code_points_for_property(
property_name, property_value
)
end

if code_points.empty?
raise UnicodeRegexParserError,
Expand Down
2 changes: 1 addition & 1 deletion spec/segmentation/rule_set_spec.rb
Expand Up @@ -92,7 +92,7 @@ def error_message(test, test_case_boundaries, result_boundaries, rule_ids)
# Since there's no such thing as a partial regex match in Ruby,
# we have to ignore these cases. Hopefully they happen infrequently
# in practice.
let(:skip_cases) { ['÷ 05D0 × 0027 ÷', '÷ 05D0 × 0308 × 0027 ÷'] }
let(:skip_cases) { [] } # { ['÷ 05D0 × 0027 ÷', '÷ 05D0 × 0308 × 0027 ÷'] }

it_behaves_like 'a conformant implementation'
end
Expand Down

0 comments on commit ac78984

Please sign in to comment.