/
time_tokenizer.rb
40 lines (36 loc) · 1.37 KB
/
time_tokenizer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
module TwitterCldr
module Tokenizers
class TimeTokenizer < TwitterCldr::Tokenizers::DateTimeTokenizer
TOKEN_SPLITTER_REGEX = /(\'[\w\s-]+\'|a{1}|h{1,2}|H{1,2}|K{1,2}|k{1,2}|m{1,2}|s{1,2}|S+|z{1,4}|Z{1,4})/
TOKEN_TYPE_REGEXES = [{ :type => :pattern, :regex => /^a{1}|h{1,2}|H{1,2}|K{1,2}|k{1,2}|m{1,2}|s{1,2}|S+|z{1,4}|Z{1,4}/ },
{ :type => :plaintext, :regex => // }]
PATHS = { :default => "calendars.gregorian.formats.time.default",
:full => "calendars.gregorian.formats.time.full",
:long => "calendars.gregorian.formats.time.long",
:medium => "calendars.gregorian.formats.time.medium",
:short => "calendars.gregorian.formats.time.short" }
def tokens(options = {})
type = options[:type] || :default
self.tokens_for(PATHS[type], type)
end
protected
def tokenize_format(text)
final = []
text.split(TOKEN_SPLITTER_REGEX).each do |token|
unless token.empty?
TOKEN_TYPE_REGEXES.each do |token_type|
if token =~ token_type[:regex]
final << Token.new(:value => token, :type => token_type[:type])
break
end
end
end
end
final
end
def init_placeholders
@placeholders = {}
end
end
end
end