Skip to content
Browse files

Fixing tokenization bug causing single quotes to not be removed

  • Loading branch information...
1 parent 475ffcf commit 371869c050c256abab7d7eb58d12495a1abcda04 Cameron Dutro committed Feb 4, 2013
View
2 lib/twitter_cldr/tokenizers/base.rb
@@ -34,7 +34,7 @@ def tokenize_format(text)
content = token.match(regexes[token_type][:content])[1]
ret << CompositeToken.new(tokenize_format(content))
else
- ret << Token.new(:value => token, :type => token_type) # .gsub(/\A\'/, "").chomp("'")
+ ret << Token.new(:value => token, :type => token_type)
end
end
ret
View
2 lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb
@@ -20,7 +20,7 @@ def initialize(options = {})
DateTokenizer::TOKEN_SPLITTER_REGEX,
TimeTokenizer::TOKEN_SPLITTER_REGEX
),
- :else => //
+ :else => /([^\s]+)/ # groups of non-space chars
}
@token_type_regexes = {

0 comments on commit 371869c

Please sign in to comment.
Something went wrong with that request. Please try again.