Permalink
Cannot retrieve contributors at this time
Join GitHub today
GitHub is home to over 28 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
| # The Tokenizer module adds methods to the query language compiler that transforms a query string | |
| # into a stream of tokens, which are more appropriate for parsing a query string. | |
| module ScopedSearch::QueryLanguage::Tokenizer | |
| # All keywords that the language supports | |
| KEYWORDS = { 'and' => :and, 'or' => :or, 'not' => :not, 'set?' => :notnull, 'has' => :notnull, 'null?' => :null, 'before' => :lt, 'after' => :gt, 'at' => :eq } | |
| # Every operator the language supports. | |
| OPERATORS = { '&' => :and, '|' => :or, '&&' => :and, '||' => :or, '-'=> :not, '!' => :not, '~' => :like, '!~' => :unlike, | |
| '=' => :eq, '==' => :eq, '!=' => :ne, '<>' => :ne, '>' => :gt, '<' => :lt, '>=' => :gte, '<=' => :lte, '^' => :in, '!^' => :notin } | |
| # Tokenizes the string and returns the result as an array of tokens. | |
| def tokenize | |
| @current_char_pos = -1 | |
| to_a | |
| end | |
| # Returns the current character of the string | |
| def current_char | |
| @current_char | |
| end | |
| # Returns a following character of the string (by default, the next | |
| # character), without updating the position pointer. | |
| def peek_char(amount = 1) | |
| @str[@current_char_pos + amount, 1] | |
| end | |
| # Returns the next character of the string, and moves the position | |
| # pointer one step forward | |
| def next_char | |
| @current_char_pos += 1 | |
| @current_char = @str[@current_char_pos, 1] | |
| end | |
| # Tokenizes the string by iterating over the characters. | |
| def each_token(&block) | |
| while next_char | |
| case current_char | |
| when /^\s?$/; # ignore | |
| when '('; yield(:lparen) | |
| when ')'; yield(:rparen) | |
| when ','; yield(:comma) | |
| when /\&|\||=|<|>|\^|!|~|-/; tokenize_operator(&block) | |
| when '"'; tokenize_quoted_keyword(&block) | |
| else; tokenize_keyword(&block) | |
| end | |
| end | |
| end | |
| # Tokenizes an operator that occurs in the OPERATORS hash | |
| # The .to_s on [peek|next]_char is to prevent a ruby bug when nil | |
| # values are returned from strings which have forced encoding. | |
| # https://github.com/wvanbergen/scoped_search/issues/33 for details | |
| def tokenize_operator(&block) | |
| operator = current_char | |
| operator << next_char.to_s if OPERATORS.has_key?(operator + peek_char.to_s) | |
| yield(OPERATORS[operator]) | |
| end | |
| # Tokenizes a keyword, and converts it to a Symbol if it is recognized as a | |
| # reserved language keyword (the KEYWORDS array). | |
| def tokenize_keyword(&block) | |
| keyword = current_char | |
| keyword << next_char while /[^=~<>\s\&\|\)\(,]/ =~ peek_char | |
| KEYWORDS.has_key?(keyword.downcase) ? yield(KEYWORDS[keyword.downcase]) : yield(keyword) | |
| end | |
| # Tokenizes a keyword that is quoted using double quotes. Allows escaping | |
| # of double quote characters by backslashes. | |
| def tokenize_quoted_keyword(&block) | |
| keyword = "" | |
| until next_char.nil? || current_char == '"' | |
| keyword << (current_char == "\\" ? next_char : current_char) | |
| end | |
| yield(keyword) | |
| end | |
| alias :each :each_token | |
| end |