/
base.rb
125 lines (108 loc) · 3.78 KB
/
base.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# encoding: UTF-8
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
module TwitterCldr
module Tokenizers
class Base
attr_reader :resource, :locale
attr_reader :token_splitter_regex, :token_type_regexes, :paths
attr_accessor :type, :placeholders
def initialize(options = {})
@locale = (options[:locale] || TwitterCldr::DEFAULT_LOCALE).to_sym
self.init_resources
self.init_placeholders
end
protected
# Not to be confused with tokenize_pattern, which pulls out placeholders. Tokenize_format actually splits a completely
# expanded format string into whatever parts are defined by the subclass's token type and token splitter regexes.
def tokenize_format(text)
final = []
text.split(self.token_splitter_regex).each_with_index do |token, index|
unless index == 0 && token == ""
self.token_type_regexes.each do |token_type|
if token =~ token_type[:regex]
if token_type[:type] == :composite
content = token.match(token_type[:content])[1]
final << CompositeToken.new(tokenize_format(content))
else
final << Token.new(:value => token, :type => token_type[:type])
end
break
end
end
end
end
final
end
def tokens_for(key, type)
final = []
tokens = self.expand_pattern(self.pattern_for(self.traverse(key)), type)
tokens.each do |token|
if token.is_a?(Token) || token.is_a?(CompositeToken)
final << token
else
final += tokenize_format(token[:value])
end
end
final
end
def init_placeholders
@placeholders = {}
end
def traverse(needle, haystack = @resource)
needle.to_s.split('.').inject(haystack) do |current, segment|
key = segment.to_sym
if current.is_a?(Hash) && current.has_key?(key)
current[key]
else
return
end
end
end
def expand_pattern(format_str, type)
if format_str.is_a?(Symbol)
# symbols mean another path was given
self.expand_pattern(self.pattern_for(self.traverse(format_str)), type)
else
parts = tokenize_pattern(format_str)
final = []
parts.each do |part|
case part[:type]
when :placeholder then
placeholder = self.choose_placeholder(part[:value], @placeholders)
final += placeholder ? placeholder.tokens(:type => type) : []
else
final << part
end
end
final
end
end
# Tokenize_pattern is supposed to take a pattern found in the YAML resource files and break it into placeholders and plaintext.
# Placeholders are delimited by single and double curly braces, plaintext is everything else.
def tokenize_pattern(pattern_str)
results = []
pattern_str.split(/(\{\{?\w*\}?\}|\'\w+\')/).each do |piece|
unless piece.empty?
case piece[0].chr
when "{"
results << { :value => piece, :type => :placeholder }
else
results << { :value => piece, :type => :plaintext }
end
end
end
results
end
def choose_placeholder(token, placeholders)
if token[0..1] == "{{"
token_value = token[2..-3]
found = placeholders.find { |placeholder| placeholder[:name].to_s == token_value }
else
found = placeholders[token[1..-2].to_i]
end
found ? found[:object] : nil
end
end
end
end