Work on tokenizer a little bit.

wycats · Nov 30, 2009 · 4a8da22 · 4a8da22
1 parent f8e3331
commit 4a8da22
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 86 deletions.
diff --git a/irb/ruby-lex.rb b/irb/ruby-lex.rb
@@ -93,7 +93,6 @@ def get_readed
 
   def getc
     while @rests.empty?
-#      return nil unless buf_input
       @rests.push nil unless buf_input
     end
     c = @rests.shift
@@ -235,7 +234,6 @@ def each_top_level_statement
           unless l = lex
             throw :TERM_INPUT if @line == ''
           else
-            #p l
             @line.concat l
             if @ltype or @continue or @indent > 0
               next
@@ -284,8 +282,6 @@ def lex
   end
 
   def token
-    #      require "tracer"
-    #      Tracer.on
     @prev_seek = @seek
     @prev_line_no = @line_no
     @prev_char_no = @char_no
@@ -301,7 +297,6 @@ def token
     if @readed_auto_clean_up
       get_readed
     end
-    #      Tracer.off
     tk
   end
 
@@ -502,12 +497,12 @@ def lex_init()
       catch(:RET) do
         if @lex_state == EXPR_ARG
           if @space_seen and peek(0) =~ /[0-9]/
-            throw :RET, identify_number
+            throw :RET, identify_number(op)
           else
             @lex_state = EXPR_BEG
           end
         elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
-          throw :RET, identify_number
+          throw :RET, identify_number(op)
         else
           @lex_state = EXPR_BEG
         end
@@ -559,7 +554,6 @@ def lex_int2
 
     @OP.def_rule("::") do
        |op, io|
-#      p @lex_state.id2name, @space_seen
       if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
         @lex_state = EXPR_BEG
         Token(TkCOLON3)
@@ -591,11 +585,6 @@ def lex_int2
       Token("^")
     end
 
-    #       @OP.def_rules("^=") do
-    #   @lex_state = EXPR_BEG
-    #   Token(OP_ASGN, :^)
-    #       end
-
     @OP.def_rules(",") do
       |op, io|
       @lex_state = EXPR_BEG
@@ -725,16 +714,6 @@ def lex_int2
       end
     end
 
-    #       @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
-    #   |op, io|
-    #   @indent += 1
-    #   @lex_state = EXPR_FNAME
-    # # @lex_state = EXPR_END
-    # # until @rests[0] == "\n" or @rests[0] == ";"
-    # #   rests.shift
-    # # end
-    #       end
-
     @OP.def_rule("") do
       |op, io|
       printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
@@ -847,7 +826,6 @@ def identify_identifier
                   @indent += 1
                   @indent_stack.push token_c
                 end
-#               p @indent_stack
               end
 
             elsif DEINDENT_CLAUSE.include?(token)
@@ -885,7 +863,6 @@ def identify_identifier
 
   def identify_here_document
     ch = getc
-#    if lt = PERCENT_LTYPE[ch]
     if ch == "-"
       ch = getc
       indent = true
@@ -944,44 +921,46 @@ def identify_quotation
     else
       RubyLex.fail SyntaxError, "unknown type of %string"
     end
-#     if ch !~ /\W/
-#       ungetc
-#       next
-#     end
-    #@ltype = lt
     @quoted = ch unless @quoted = PERCENT_PAREN[ch]
     identify_string(lt, @quoted)
   end
 
-  def identify_number
+  def identify_number(op = "")
     @lex_state = EXPR_END
 
+    value = op
+
     if peek(0) == "0" && peek(1) !~ /[.eE]/
-      getc
-      case peek(0)
+      value << getc
+      case next_peek = peek(0)
       when /[xX]/
         ch = getc
+        value << ch
         match = /[0-9a-fA-F_]/
       when /[bB]/
         ch = getc
+        value << ch
         match = /[01_]/
       when /[oO]/
         ch = getc
+        value << ch
         match = /[0-7_]/
       when /[dD]/
         ch = getc
+        value << ch
         match = /[0-9_]/
       when /[0-7]/
         match = /[0-7_]/
       when /[89]/
         RubyLex.fail SyntaxError, "Illegal octal digit"
       else
-        return Token(TkINTEGER)
+        return Token(TkINTEGER, value)
       end
 
       len0 = true
       non_digit = false
       while ch = getc
+        value << ch
         if match =~ ch
           if ch == "_"
             if non_digit
@@ -1004,14 +983,15 @@ def identify_number
           break
         end
       end
-      return Token(TkINTEGER)
+      return Token(TkINTEGER, value)
     end
 
     type = TkINTEGER
     allow_point = true
     allow_e = true
     non_digit = false
     while ch = getc
+      value << ch
       case ch
       when /[0-9]/
         non_digit = false
@@ -1034,7 +1014,7 @@ def identify_number
         end
         type = TkFLOAT
         if peek(0) =~ /[+-]/
-          getc
+          value << getc
         end
         allow_e = false
         allow_point = false
@@ -1047,7 +1027,7 @@ def identify_number
         break
       end
     end
-    Token(type)
+    Token(type, value)
   end
 
   def identify_string(ltype, quoted = ltype)
@@ -1092,17 +1072,17 @@ def identify_string(ltype, quoted = ltype)
   def identify_comment
     @ltype = "#"
 
+    val = ""
+
     while ch = getc
-#      if ch == "\\" #"
-#       read_escape
-#      end
       if ch == "\n"
         @ltype = nil
         ungetc
         break
       end
+      val << ch
     end
-    return Token(TkCOMMENT)
+    return Token(TkCOMMENT, val)
   end
 
   def read_escape

diff --git a/irb/ruby-token.rb b/irb/ruby-token.rb
@@ -18,43 +18,40 @@ module RubyToken
   EXPR_DOT = :EXPR_DOT
   EXPR_CLASS = :EXPR_CLASS
 
-  # for ruby 1.4X
-  if !defined?(Symbol)
-    Symbol = Integer
-  end
-
   class Token
-    def initialize(seek, line_no, char_no)
+    def initialize(seek, line_no, char_no, value = nil)
       @seek = seek
       @line_no = line_no
       @char_no = char_no
     end
 
     def to_s
-      vars = instance_variables - [:@seek, :@line_no, :@char_no]
-      details = vars.map {|v| "#{v}:#{instance_variable_get(v)}" }.join(" ")
+      vars = instance_variables - [:@seek, :@line_no, :@char_no, "@seek", "@line_no", "@char_no"]
+      details = vars.map {|v| "#{v.to_s.sub(/^@/, '')}:#{instance_variable_get(v)}" }.join(" ")
       "#{self.class.name} #{line_no}:#{char_no} #{details}"
     end
     alias inspect to_s
 
-    attr :seek
-    attr :line_no
-    attr :char_no
+    attr_reader :seek, :line_no, :char_no
   end
 
-  class TkNode < Token
-    def initialize(seek, line_no, char_no)
-      super
+  class TkContents < Token
+    def initialize(seek, line_no, char_no, contents)
+      super(seek, line_no, char_no)
+      @contents = contents
     end
-    attr :node
+    attr_reader :contents
+  end
+
+  class TkNode < Token
   end
 
   class TkId < Token
     def initialize(seek, line_no, char_no, name)
       super(seek, line_no, char_no)
       @name = name
     end
-    attr :name
+    attr_reader :name
   end
 
   class TkVal < Token
@@ -66,7 +63,7 @@ def initialize(seek, line_no, char_no, value = nil)
   end
 
   class TkOp < Token
-    attr :name, true
+    attr_accessor :name
   end
 
   class TkOPASGN < TkOp
@@ -75,13 +72,13 @@ def initialize(seek, line_no, char_no, op)
       op = TkReading2Token[op][0] unless op.kind_of?(Symbol)
       @op = op
     end
-    attr :op
+    attr_reader :op
   end
 
   class TkUnknownChar < Token
     def initialize(seek, line_no, char_no, id)
       super(seek, line_no, char_no)
-      @name = name
+      @name = id
     end
     attr :name
   end
@@ -106,11 +103,7 @@ def Token(token, value = nil)
       end
       return Token(tk[0], value)
     else
-      if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
-        token.new(@prev_seek, @prev_line_no, @prev_char_no)
-      else
-        token.new(@prev_seek, @prev_line_no, @prev_char_no, value)
-      end
+      token.new(@prev_seek, @prev_line_no, @prev_char_no, value)
     end
   end
 
@@ -201,7 +194,6 @@ def Token(token, value = nil)
     [:TkRSHFT,      TkOp,   ">>"],
     [:TkCOLON2,     TkOp],
     [:TkCOLON3,     TkOp],
-#   [:OPASGN,       TkOp],               # +=, -=  etc. #
     [:TkASSOC,      TkOp,   "=>"],
     [:TkQUESTION,   TkOp,   "?"],        #?
     [:TkCOLON,      TkOp,   ":"],        #:
@@ -239,7 +231,7 @@ def Token(token, value = nil)
     [:TkCOMMA,      Token,  ","],
     [:TkSEMICOLON,  Token,  ";"],
 
-    [:TkCOMMENT],
+    [:TkCOMMENT,    TkContents],
     [:TkRD_COMMENT],
     [:TkSPACE],
     [:TkNL],
@@ -255,27 +247,13 @@ def Token(token, value = nil)
   TkReading2Token = {}
   TkSymbol2Token = {}
 
-  def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts)
-    token_n = token_n.id2name if token_n.kind_of?(Symbol)
-    if RubyToken.const_defined?(token_n)
-      IRB.fail AlreadyDefinedToken, token_n
-    end
-    token_c = eval("class #{token_n} < #{super_token}; end; #{token_n}")
-
-    if reading
-      if TkReading2Token[reading]
-        IRB.fail TkReading2TokenDuplicateError, token_n, reading
-      end
-      if opts.empty?
-        TkReading2Token[reading] = [token_c]
-      else
-        TkReading2Token[reading] = [token_c].concat(opts)
-      end
-    end
-    TkSymbol2Token[token_n.intern] = token_c
+  def self.def_token(token_n, super_token = Token, reading = nil, *opts)
+    token_c = const_set(token_n, Class.new(super_token))
+    TkReading2Token[reading] = [token_c].concat(opts) if reading
+    TkSymbol2Token[token_n] = token_c
   end
 
-  for defs in TokenDefinitions
+  TokenDefinitions.each do |defs|
     def_token(*defs)
   end
 end