Skip to content

Commit

Permalink
Work on tokenizer a little bit.
Browse files Browse the repository at this point in the history
  • Loading branch information
Yehuda Katz authored and Yehuda Katz committed Nov 30, 2009
1 parent f8e3331 commit 4a8da22
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 86 deletions.
62 changes: 21 additions & 41 deletions irb/ruby-lex.rb
Expand Up @@ -93,7 +93,6 @@ def get_readed

def getc
while @rests.empty?
# return nil unless buf_input
@rests.push nil unless buf_input
end
c = @rests.shift
Expand Down Expand Up @@ -235,7 +234,6 @@ def each_top_level_statement
unless l = lex
throw :TERM_INPUT if @line == ''
else
#p l
@line.concat l
if @ltype or @continue or @indent > 0
next
Expand Down Expand Up @@ -284,8 +282,6 @@ def lex
end

def token
# require "tracer"
# Tracer.on
@prev_seek = @seek
@prev_line_no = @line_no
@prev_char_no = @char_no
Expand All @@ -301,7 +297,6 @@ def token
if @readed_auto_clean_up
get_readed
end
# Tracer.off
tk
end

Expand Down Expand Up @@ -502,12 +497,12 @@ def lex_init()
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen and peek(0) =~ /[0-9]/
throw :RET, identify_number
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
throw :RET, identify_number
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
Expand Down Expand Up @@ -559,7 +554,6 @@ def lex_int2

@OP.def_rule("::") do
|op, io|
# p @lex_state.id2name, @space_seen
if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
Token(TkCOLON3)
Expand Down Expand Up @@ -591,11 +585,6 @@ def lex_int2
Token("^")
end

# @OP.def_rules("^=") do
# @lex_state = EXPR_BEG
# Token(OP_ASGN, :^)
# end

@OP.def_rules(",") do
|op, io|
@lex_state = EXPR_BEG
Expand Down Expand Up @@ -725,16 +714,6 @@ def lex_int2
end
end

# @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
# |op, io|
# @indent += 1
# @lex_state = EXPR_FNAME
# # @lex_state = EXPR_END
# # until @rests[0] == "\n" or @rests[0] == ";"
# # rests.shift
# # end
# end

@OP.def_rule("") do
|op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
Expand Down Expand Up @@ -847,7 +826,6 @@ def identify_identifier
@indent += 1
@indent_stack.push token_c
end
# p @indent_stack
end

elsif DEINDENT_CLAUSE.include?(token)
Expand Down Expand Up @@ -885,7 +863,6 @@ def identify_identifier

def identify_here_document
ch = getc
# if lt = PERCENT_LTYPE[ch]
if ch == "-"
ch = getc
indent = true
Expand Down Expand Up @@ -944,44 +921,46 @@ def identify_quotation
else
RubyLex.fail SyntaxError, "unknown type of %string"
end
# if ch !~ /\W/
# ungetc
# next
# end
#@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted)
end

def identify_number
def identify_number(op = "")
@lex_state = EXPR_END

value = op

if peek(0) == "0" && peek(1) !~ /[.eE]/
getc
case peek(0)
value << getc
case next_peek = peek(0)
when /[xX]/
ch = getc
value << ch
match = /[0-9a-fA-F_]/
when /[bB]/
ch = getc
value << ch
match = /[01_]/
when /[oO]/
ch = getc
value << ch
match = /[0-7_]/
when /[dD]/
ch = getc
value << ch
match = /[0-9_]/
when /[0-7]/
match = /[0-7_]/
when /[89]/
RubyLex.fail SyntaxError, "Illegal octal digit"
else
return Token(TkINTEGER)
return Token(TkINTEGER, value)
end

len0 = true
non_digit = false
while ch = getc
value << ch
if match =~ ch
if ch == "_"
if non_digit
Expand All @@ -1004,14 +983,15 @@ def identify_number
break
end
end
return Token(TkINTEGER)
return Token(TkINTEGER, value)
end

type = TkINTEGER
allow_point = true
allow_e = true
non_digit = false
while ch = getc
value << ch
case ch
when /[0-9]/
non_digit = false
Expand All @@ -1034,7 +1014,7 @@ def identify_number
end
type = TkFLOAT
if peek(0) =~ /[+-]/
getc
value << getc
end
allow_e = false
allow_point = false
Expand All @@ -1047,7 +1027,7 @@ def identify_number
break
end
end
Token(type)
Token(type, value)
end

def identify_string(ltype, quoted = ltype)
Expand Down Expand Up @@ -1092,17 +1072,17 @@ def identify_string(ltype, quoted = ltype)
def identify_comment
@ltype = "#"

val = ""

while ch = getc
# if ch == "\\" #"
# read_escape
# end
if ch == "\n"
@ltype = nil
ungetc
break
end
val << ch
end
return Token(TkCOMMENT)
return Token(TkCOMMENT, val)
end

def read_escape
Expand Down
68 changes: 23 additions & 45 deletions irb/ruby-token.rb
Expand Up @@ -18,43 +18,40 @@ module RubyToken
EXPR_DOT = :EXPR_DOT
EXPR_CLASS = :EXPR_CLASS

# for ruby 1.4X
if !defined?(Symbol)
Symbol = Integer
end

class Token
def initialize(seek, line_no, char_no)
def initialize(seek, line_no, char_no, value = nil)
@seek = seek
@line_no = line_no
@char_no = char_no
end

def to_s
vars = instance_variables - [:@seek, :@line_no, :@char_no]
details = vars.map {|v| "#{v}:#{instance_variable_get(v)}" }.join(" ")
vars = instance_variables - [:@seek, :@line_no, :@char_no, "@seek", "@line_no", "@char_no"]
details = vars.map {|v| "#{v.to_s.sub(/^@/, '')}:#{instance_variable_get(v)}" }.join(" ")
"#{self.class.name} #{line_no}:#{char_no} #{details}"
end
alias inspect to_s

attr :seek
attr :line_no
attr :char_no
attr_reader :seek, :line_no, :char_no
end

class TkNode < Token
def initialize(seek, line_no, char_no)
super
class TkContents < Token
def initialize(seek, line_no, char_no, contents)
super(seek, line_no, char_no)
@contents = contents
end
attr :node
attr_reader :contents
end

class TkNode < Token
end

class TkId < Token
def initialize(seek, line_no, char_no, name)
super(seek, line_no, char_no)
@name = name
end
attr :name
attr_reader :name
end

class TkVal < Token
Expand All @@ -66,7 +63,7 @@ def initialize(seek, line_no, char_no, value = nil)
end

class TkOp < Token
attr :name, true
attr_accessor :name
end

class TkOPASGN < TkOp
Expand All @@ -75,13 +72,13 @@ def initialize(seek, line_no, char_no, op)
op = TkReading2Token[op][0] unless op.kind_of?(Symbol)
@op = op
end
attr :op
attr_reader :op
end

class TkUnknownChar < Token
def initialize(seek, line_no, char_no, id)
super(seek, line_no, char_no)
@name = name
@name = id
end
attr :name
end
Expand All @@ -106,11 +103,7 @@ def Token(token, value = nil)
end
return Token(tk[0], value)
else
if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
token.new(@prev_seek, @prev_line_no, @prev_char_no)
else
token.new(@prev_seek, @prev_line_no, @prev_char_no, value)
end
token.new(@prev_seek, @prev_line_no, @prev_char_no, value)
end
end

Expand Down Expand Up @@ -201,7 +194,6 @@ def Token(token, value = nil)
[:TkRSHFT, TkOp, ">>"],
[:TkCOLON2, TkOp],
[:TkCOLON3, TkOp],
# [:OPASGN, TkOp], # +=, -= etc. #
[:TkASSOC, TkOp, "=>"],
[:TkQUESTION, TkOp, "?"], #?
[:TkCOLON, TkOp, ":"], #:
Expand Down Expand Up @@ -239,7 +231,7 @@ def Token(token, value = nil)
[:TkCOMMA, Token, ","],
[:TkSEMICOLON, Token, ";"],

[:TkCOMMENT],
[:TkCOMMENT, TkContents],
[:TkRD_COMMENT],
[:TkSPACE],
[:TkNL],
Expand All @@ -255,27 +247,13 @@ def Token(token, value = nil)
TkReading2Token = {}
TkSymbol2Token = {}

def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts)
token_n = token_n.id2name if token_n.kind_of?(Symbol)
if RubyToken.const_defined?(token_n)
IRB.fail AlreadyDefinedToken, token_n
end
token_c = eval("class #{token_n} < #{super_token}; end; #{token_n}")

if reading
if TkReading2Token[reading]
IRB.fail TkReading2TokenDuplicateError, token_n, reading
end
if opts.empty?
TkReading2Token[reading] = [token_c]
else
TkReading2Token[reading] = [token_c].concat(opts)
end
end
TkSymbol2Token[token_n.intern] = token_c
def self.def_token(token_n, super_token = Token, reading = nil, *opts)
token_c = const_set(token_n, Class.new(super_token))
TkReading2Token[reading] = [token_c].concat(opts) if reading
TkSymbol2Token[token_n] = token_c
end

for defs in TokenDefinitions
TokenDefinitions.each do |defs|
def_token(*defs)
end
end

0 comments on commit 4a8da22

Please sign in to comment.