Permalink
Browse files

Work on tokenizer a little bit.

  • Loading branch information...
Yehuda Katz Yehuda Katz
Yehuda Katz authored and Yehuda Katz committed Nov 30, 2009
1 parent f8e3331 commit 4a8da22609c3559a43704710ba711f9e0992c020
Showing with 44 additions and 86 deletions.
  1. +21 −41 irb/ruby-lex.rb
  2. +23 −45 irb/ruby-token.rb
View
@@ -93,7 +93,6 @@ def get_readed
def getc
while @rests.empty?
-# return nil unless buf_input
@rests.push nil unless buf_input
end
c = @rests.shift
@@ -235,7 +234,6 @@ def each_top_level_statement
unless l = lex
throw :TERM_INPUT if @line == ''
else
- #p l
@line.concat l
if @ltype or @continue or @indent > 0
next
@@ -284,8 +282,6 @@ def lex
end
def token
- # require "tracer"
- # Tracer.on
@prev_seek = @seek
@prev_line_no = @line_no
@prev_char_no = @char_no
@@ -301,7 +297,6 @@ def token
if @readed_auto_clean_up
get_readed
end
- # Tracer.off
tk
end
@@ -502,12 +497,12 @@ def lex_init()
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen and peek(0) =~ /[0-9]/
- throw :RET, identify_number
+ throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
- throw :RET, identify_number
+ throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
@@ -559,7 +554,6 @@ def lex_int2
@OP.def_rule("::") do
|op, io|
-# p @lex_state.id2name, @space_seen
if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
Token(TkCOLON3)
@@ -591,11 +585,6 @@ def lex_int2
Token("^")
end
- # @OP.def_rules("^=") do
- # @lex_state = EXPR_BEG
- # Token(OP_ASGN, :^)
- # end
-
@OP.def_rules(",") do
|op, io|
@lex_state = EXPR_BEG
@@ -725,16 +714,6 @@ def lex_int2
end
end
- # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
- # |op, io|
- # @indent += 1
- # @lex_state = EXPR_FNAME
- # # @lex_state = EXPR_END
- # # until @rests[0] == "\n" or @rests[0] == ";"
- # # rests.shift
- # # end
- # end
-
@OP.def_rule("") do
|op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
@@ -847,7 +826,6 @@ def identify_identifier
@indent += 1
@indent_stack.push token_c
end
-# p @indent_stack
end
elsif DEINDENT_CLAUSE.include?(token)
@@ -885,7 +863,6 @@ def identify_identifier
def identify_here_document
ch = getc
-# if lt = PERCENT_LTYPE[ch]
if ch == "-"
ch = getc
indent = true
@@ -944,44 +921,46 @@ def identify_quotation
else
RubyLex.fail SyntaxError, "unknown type of %string"
end
-# if ch !~ /\W/
-# ungetc
-# next
-# end
- #@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted)
end
- def identify_number
+ def identify_number(op = "")
@lex_state = EXPR_END
+ value = op
+
if peek(0) == "0" && peek(1) !~ /[.eE]/
- getc
- case peek(0)
+ value << getc
+ case next_peek = peek(0)
when /[xX]/
ch = getc
+ value << ch
match = /[0-9a-fA-F_]/
when /[bB]/
ch = getc
+ value << ch
match = /[01_]/
when /[oO]/
ch = getc
+ value << ch
match = /[0-7_]/
when /[dD]/
ch = getc
+ value << ch
match = /[0-9_]/
when /[0-7]/
match = /[0-7_]/
when /[89]/
RubyLex.fail SyntaxError, "Illegal octal digit"
else
- return Token(TkINTEGER)
+ return Token(TkINTEGER, value)
end
len0 = true
non_digit = false
while ch = getc
+ value << ch
if match =~ ch
if ch == "_"
if non_digit
@@ -1004,14 +983,15 @@ def identify_number
break
end
end
- return Token(TkINTEGER)
+ return Token(TkINTEGER, value)
end
type = TkINTEGER
allow_point = true
allow_e = true
non_digit = false
while ch = getc
+ value << ch
case ch
when /[0-9]/
non_digit = false
@@ -1034,7 +1014,7 @@ def identify_number
end
type = TkFLOAT
if peek(0) =~ /[+-]/
- getc
+ value << getc
end
allow_e = false
allow_point = false
@@ -1047,7 +1027,7 @@ def identify_number
break
end
end
- Token(type)
+ Token(type, value)
end
def identify_string(ltype, quoted = ltype)
@@ -1092,17 +1072,17 @@ def identify_string(ltype, quoted = ltype)
def identify_comment
@ltype = "#"
+ val = ""
+
while ch = getc
-# if ch == "\\" #"
-# read_escape
-# end
if ch == "\n"
@ltype = nil
ungetc
break
end
+ val << ch
end
- return Token(TkCOMMENT)
+ return Token(TkCOMMENT, val)
end
def read_escape
View
@@ -18,43 +18,40 @@ module RubyToken
EXPR_DOT = :EXPR_DOT
EXPR_CLASS = :EXPR_CLASS
- # for ruby 1.4X
- if !defined?(Symbol)
- Symbol = Integer
- end
-
class Token
- def initialize(seek, line_no, char_no)
+ def initialize(seek, line_no, char_no, value = nil)
@seek = seek
@line_no = line_no
@char_no = char_no
end
def to_s
- vars = instance_variables - [:@seek, :@line_no, :@char_no]
- details = vars.map {|v| "#{v}:#{instance_variable_get(v)}" }.join(" ")
+ vars = instance_variables - [:@seek, :@line_no, :@char_no, "@seek", "@line_no", "@char_no"]
+ details = vars.map {|v| "#{v.to_s.sub(/^@/, '')}:#{instance_variable_get(v)}" }.join(" ")
"#{self.class.name} #{line_no}:#{char_no} #{details}"
end
alias inspect to_s
- attr :seek
- attr :line_no
- attr :char_no
+ attr_reader :seek, :line_no, :char_no
end
- class TkNode < Token
- def initialize(seek, line_no, char_no)
- super
+ class TkContents < Token
+ def initialize(seek, line_no, char_no, contents)
+ super(seek, line_no, char_no)
+ @contents = contents
end
- attr :node
+ attr_reader :contents
+ end
+
+ class TkNode < Token
end
class TkId < Token
def initialize(seek, line_no, char_no, name)
super(seek, line_no, char_no)
@name = name
end
- attr :name
+ attr_reader :name
end
class TkVal < Token
@@ -66,7 +63,7 @@ def initialize(seek, line_no, char_no, value = nil)
end
class TkOp < Token
- attr :name, true
+ attr_accessor :name
end
class TkOPASGN < TkOp
@@ -75,13 +72,13 @@ def initialize(seek, line_no, char_no, op)
op = TkReading2Token[op][0] unless op.kind_of?(Symbol)
@op = op
end
- attr :op
+ attr_reader :op
end
class TkUnknownChar < Token
def initialize(seek, line_no, char_no, id)
super(seek, line_no, char_no)
- @name = name
+ @name = id
end
attr :name
end
@@ -106,11 +103,7 @@ def Token(token, value = nil)
end
return Token(tk[0], value)
else
- if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
- token.new(@prev_seek, @prev_line_no, @prev_char_no)
- else
- token.new(@prev_seek, @prev_line_no, @prev_char_no, value)
- end
+ token.new(@prev_seek, @prev_line_no, @prev_char_no, value)
end
end
@@ -201,7 +194,6 @@ def Token(token, value = nil)
[:TkRSHFT, TkOp, ">>"],
[:TkCOLON2, TkOp],
[:TkCOLON3, TkOp],
-# [:OPASGN, TkOp], # +=, -= etc. #
[:TkASSOC, TkOp, "=>"],
[:TkQUESTION, TkOp, "?"], #?
[:TkCOLON, TkOp, ":"], #:
@@ -239,7 +231,7 @@ def Token(token, value = nil)
[:TkCOMMA, Token, ","],
[:TkSEMICOLON, Token, ";"],
- [:TkCOMMENT],
+ [:TkCOMMENT, TkContents],
[:TkRD_COMMENT],
[:TkSPACE],
[:TkNL],
@@ -255,27 +247,13 @@ def Token(token, value = nil)
TkReading2Token = {}
TkSymbol2Token = {}
- def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts)
- token_n = token_n.id2name if token_n.kind_of?(Symbol)
- if RubyToken.const_defined?(token_n)
- IRB.fail AlreadyDefinedToken, token_n
- end
- token_c = eval("class #{token_n} < #{super_token}; end; #{token_n}")
-
- if reading
- if TkReading2Token[reading]
- IRB.fail TkReading2TokenDuplicateError, token_n, reading
- end
- if opts.empty?
- TkReading2Token[reading] = [token_c]
- else
- TkReading2Token[reading] = [token_c].concat(opts)
- end
- end
- TkSymbol2Token[token_n.intern] = token_c
+ def self.def_token(token_n, super_token = Token, reading = nil, *opts)
+ token_c = const_set(token_n, Class.new(super_token))
+ TkReading2Token[reading] = [token_c].concat(opts) if reading
+ TkSymbol2Token[token_n] = token_c
end
- for defs in TokenDefinitions
+ TokenDefinitions.each do |defs|
def_token(*defs)
end
end

0 comments on commit 4a8da22

Please sign in to comment.