-
Notifications
You must be signed in to change notification settings - Fork 93
/
base.rb
170 lines (144 loc) · 5.19 KB
/
base.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# encoding: UTF-8
# Copyright 2012 Twitter, Inc
# http://www.apache.org/licenses/LICENSE-2.0
module TwitterCldr
module Tokenizers
class Base
attr_reader :resource, :locale
attr_reader :token_splitter_regexes, :token_type_regexes, :paths, :format
attr_accessor :type, :placeholders
def initialize(options = {})
@locale = TwitterCldr.convert_locale(options[:locale] || TwitterCldr::DEFAULT_LOCALE)
init_resources
init_placeholders
end
protected
# Not to be confused with tokenize_pattern, which pulls out placeholders. Tokenize_format actually splits a completely
# expanded format string into whatever parts are defined by the subclass's token type and token splitter regexes.
def tokenize_format(text)
text.split(token_splitter_regex_for(type)).each_with_index.inject([]) do |ret, (token, index)|
unless index == 0 && token == ""
regexes = token_type_regexes_for(type)
token_type = regexes.inject([]) do |match_ret, (token_type, matchers)|
match_ret << token_type if token =~ matchers[:regex]
match_ret
end.min { |a, b| regexes[a][:priority] <=> regexes[b][:priority] }
if token_type == :composite
content = token.match(regexes[token_type][:content])[1]
ret << CompositeToken.new(tokenize_format(content))
else
ret << Token.new(:value => token, :type => token_type)
end
end
ret
end
end
def token_type_regexes_for(type)
token_type_regexes[type] || token_type_regexes[:else]
end
def token_splitter_regex_for(type)
token_splitter_regexes[type] || token_splitter_regexes[:else]
end
def tokens_for(path, type)
@@token_cache ||= {}
cache_key = TwitterCldr::Utils.compute_cache_key(@locale, path.join('.'), type, format || "nil")
unless @@token_cache.include?(cache_key)
result = []
tokens = expand_pattern(pattern_for(traverse(path)), type)
tokens.each do |token|
if token.is_a?(Token) || token.is_a?(CompositeToken)
result << token
else
result += tokenize_format(token[:value])
end
end
@@token_cache[cache_key] = result
end
@@token_cache[cache_key]
end
def tokens_with_placeholders_for(key)
@@token_cache ||= {}
cache_key = compute_cache_key(@locale, key, type)
unless @@token_cache.include?(cache_key)
result = []
tokens = tokenize_pattern(pattern_for(traverse(key)))
tokens.each do |token|
result << token
end
@@token_cache[cache_key] = result
end
@@token_cache[cache_key]
end
def compute_cache_key(*pieces)
if pieces && pieces.size > 0
pieces.join("|").hash
else
0
end
end
def init_placeholders
@placeholders = {}
end
def traverse(path, hash = @resource)
TwitterCldr::Utils.traverse_hash(hash, path)
end
# expands all path symbols
def expand(current, haystack)
if current.is_a?(Symbol)
expand(traverse(current.to_s.split('.').map(&:to_sym), haystack), haystack)
elsif current.is_a?(Hash)
current.inject({}) do |ret, (key, val)|
ret[key] = expand(val, haystack)
ret
end
else
current
end
end
def expand_pattern(format_str, type)
if format_str.is_a?(Symbol)
# symbols mean another path was given
expand_pattern(pattern_for(traverse(format_str.to_s.split('.').map(&:to_sym))), type)
else
parts = tokenize_pattern(format_str)
final = []
parts.each do |part|
case part[:type]
when :placeholder
placeholder = choose_placeholder(part[:value], @placeholders)
final += placeholder ? placeholder.tokens(:type => type) : []
else
final << part
end
end
final
end
end
# Tokenize_pattern is supposed to take a pattern found in the YAML resource files and break it into placeholders and plaintext.
# Placeholders are delimited by single and double curly braces, plaintext is everything else.
def tokenize_pattern(pattern_str)
results = []
pattern_str.split(/(\{\{?\w*\}?\}|\'\w+\')/).each do |piece|
unless piece.empty?
case piece[0].chr
when "{"
results << { :value => piece, :type => :placeholder }
else
results << { :value => piece, :type => :plaintext }
end
end
end
results
end
def choose_placeholder(token, placeholders)
if token[0..1] == "{{"
token_value = token[2..-3]
found = placeholders.find { |placeholder| placeholder[:name].to_s == token_value }
else
found = placeholders[token[1..-2].to_i]
end
found ? found[:object] : nil
end
end
end
end