From 5d31c61328898be68ccc5ac18ba2dfb8b433486e Mon Sep 17 00:00:00 2001 From: Ilya Bylich Date: Tue, 21 May 2019 21:58:11 +0300 Subject: [PATCH] + ruby27.y: Added numbered parameters support. (#565) This commit tracks upstream commits - ruby/ruby@12acc75 - ruby/ruby@d0e25ed - ruby/ruby@964bbc1 - ruby/ruby@ae07b66 - ruby/ruby@6ca9e7c - ruby/ruby@b8f3be2 - ruby/ruby@bb4ac7a --- doc/AST_FORMAT.md | 34 ++++ lib/parser.rb | 1 + lib/parser/ast/processor.rb | 8 + lib/parser/builders/default.rb | 21 ++- lib/parser/context.rb | 8 + lib/parser/lexer.rl | 40 +++++ lib/parser/lexer/max_numparam_stack.rb | 42 +++++ lib/parser/messages.rb | 46 +++--- lib/parser/meta.rb | 1 + lib/parser/ruby27.y | 41 ++++- test/test_lexer.rb | 57 +++++++ test/test_parser.rb | 218 +++++++++++++++++++++++++ 12 files changed, 488 insertions(+), 29 deletions(-) create mode 100644 lib/parser/lexer/max_numparam_stack.rb diff --git a/doc/AST_FORMAT.md b/doc/AST_FORMAT.md index c9d5698ee..54af5f80e 100644 --- a/doc/AST_FORMAT.md +++ b/doc/AST_FORMAT.md @@ -1083,6 +1083,40 @@ However, the following code results in a parse error: def f(*a: b); end ~~~ +## Numbered parameters + +### Block with numbered parameters + +Ruby 2.7 introduced a feature called "numbered parameters". +Numbered and ordinal parameters are mutually exclusive, so if the block +has only numbered parameters it also has a different AST node. + +Note that the second child represents a total number of numbered parameters. + +Format: + +~~~ +s(:numblock, + s(:send, nil, :proc), 3, + s(:send, + s(:numparam, 1), :+, + s(:numparam, 3))) +"proc { @1 + @3 }" + ~ begin ~ end + ~~~~~~~~~~~~~~~~ expression +~~~ + +### Numbered parameter + +Format: + +~~~ +(numparam 10) +"@10" + ~~~ name + ~~~ expression +~~~ + ## Send ### To self diff --git a/lib/parser.rb b/lib/parser.rb index ead0978c9..0a5698f62 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -67,6 +67,7 @@ module Source require 'parser/lexer/literal' require 'parser/lexer/stack_state' require 'parser/lexer/dedenter' + require 'parser/lexer/max_numparam_stack' module Builders require 'parser/builders/default' diff --git a/lib/parser/ast/processor.rb b/lib/parser/ast/processor.rb index f179637d6..2c04546e0 100644 --- a/lib/parser/ast/processor.rb +++ b/lib/parser/ast/processor.rb @@ -173,6 +173,14 @@ def on_send(node) alias on_block process_regular_node alias on_lambda process_regular_node + def on_numblock(node) + method_call, max_numparam, body = *node + + node.updated(nil, [ + process(method_call), max_numparam, process(body) + ]) + end + alias on_while process_regular_node alias on_while_post process_regular_node alias on_until process_regular_node diff --git a/lib/parser/builders/default.rb b/lib/parser/builders/default.rb index 961cb600d..851cae000 100644 --- a/lib/parser/builders/default.rb +++ b/lib/parser/builders/default.rb @@ -437,6 +437,11 @@ def cvar(token) variable_map(token)) end + def numparam(token) + n(:numparam, [ value(token).to_i ], + variable_map(token)) + end + def back_ref(token) n(:back_ref, [ value(token).to_sym ], token_map(token)) @@ -663,6 +668,10 @@ def args(begin_t, args, end_t, check_args=true) collection_map(begin_t, args, end_t)) end + def numargs(max_numparam) + n(:numargs, [ max_numparam ], nil) + end + def arg(name_t) n(:arg, [ value(name_t).to_sym ], variable_map(name_t)) @@ -835,15 +844,23 @@ def block(method_call, begin_t, args, body, end_t) diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)] end + + if args.type == :numargs + block_type = :numblock + args = args.children[0] + else + block_type = :block + end + if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type) - n(:block, [ method_call, args, body ], + n(block_type, [ method_call, args, body ], block_map(method_call.loc.expression, begin_t, end_t)) else # Code like "return foo 1 do end" is reduced in a weird sequence. # Here, method_call is actually (return). actual_send, = *method_call block = - n(:block, [ actual_send, args, body ], + n(block_type, [ actual_send, args, body ], block_map(actual_send.loc.expression, begin_t, end_t)) n(method_call.type, [ block ], diff --git a/lib/parser/context.rb b/lib/parser/context.rb index aa583a5f2..18e2b765e 100644 --- a/lib/parser/context.rb +++ b/lib/parser/context.rb @@ -47,5 +47,13 @@ def class_definition_allowed? end alias module_definition_allowed? class_definition_allowed? alias dynamic_const_definition_allowed? class_definition_allowed? + + def in_block? + @stack.last == :block + end + + def in_lambda? + @stack.last == :lambda + end end end diff --git a/lib/parser/lexer.rl b/lib/parser/lexer.rl index ba3518a5c..b3f2aac60 100644 --- a/lib/parser/lexer.rl +++ b/lib/parser/lexer.rl @@ -89,7 +89,10 @@ class Parser::Lexer REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze + NUMPARAM_MAX = 100 + attr_reader :source_buffer + attr_reader :max_numparam_stack attr_accessor :diagnostics attr_accessor :static_env @@ -176,6 +179,9 @@ class Parser::Lexer # State before =begin / =end block comment @cs_before_block_comment = self.class.lex_en_line_begin + + # Maximum numbered parameters stack + @max_numparam_stack = MaxNumparamStack.new end def source_buffer=(source_buffer) @@ -249,6 +255,10 @@ class Parser::Lexer @cond = @cond_stack.pop end + def max_numparam + @max_numparam_stack.top + end + def dedent_level # We erase @dedent_level as a precaution to avoid accidentally # using a stale value. @@ -1301,6 +1311,36 @@ class Parser::Lexer fnext *stack_pop; fbreak; }; + '@' [0-9]+ + => { + if @version < 27 + diagnostic :error, :ivar_name, { :name => tok } + end + + value = tok[1..-1] + + if value[0] == '0' + diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te) + end + + if value.to_i > NUMPARAM_MAX + diagnostic :error, :too_large_numparam, nil, range(@ts, @te) + end + + if !@context.in_block? && !@context.in_lambda? + diagnostic :error, :numparam_outside_block, nil, range(@ts, @te) + end + + if !@max_numparam_stack.can_have_numparams? + diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te) + end + + @max_numparam_stack.register(value.to_i) + + emit(:tNUMPARAM, tok[1..-1]) + fnext *stack_pop; fbreak; + }; + instance_var_v => { if tok =~ /^@[0-9]/ diff --git a/lib/parser/lexer/max_numparam_stack.rb b/lib/parser/lexer/max_numparam_stack.rb new file mode 100644 index 000000000..c01708f32 --- /dev/null +++ b/lib/parser/lexer/max_numparam_stack.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module Parser + + class Lexer::MaxNumparamStack + def initialize + @stack = [] + end + + def cant_have_numparams! + set(-1) + end + + def can_have_numparams? + top >= 0 + end + + def register(numparam) + set( [top, numparam].max ) + end + + def top + @stack.last + end + + def push + @stack.push(0) + end + + def pop + @stack.pop + end + + private + + def set(value) + @stack.pop + @stack.push(value) + end + end + +end diff --git a/lib/parser/messages.rb b/lib/parser/messages.rb index efd4957d0..a9f2ee89b 100644 --- a/lib/parser/messages.rb +++ b/lib/parser/messages.rb @@ -10,27 +10,31 @@ module Parser # MESSAGES = { # Lexer errors - :unicode_point_too_large => 'invalid Unicode codepoint (too large)', - :invalid_escape => 'invalid escape character syntax', - :incomplete_escape => 'incomplete character syntax', - :invalid_hex_escape => 'invalid hex escape', - :invalid_unicode_escape => 'invalid Unicode escape', - :unterminated_unicode => 'unterminated Unicode escape', - :escape_eof => 'escape sequence meets end of file', - :string_eof => 'unterminated string meets end of file', - :regexp_options => 'unknown regexp options: %{options}', - :cvar_name => "`%{name}' is not allowed as a class variable name", - :ivar_name => "`%{name}' is not allowed as an instance variable name", - :trailing_in_number => "trailing `%{character}' in number", - :empty_numeric => 'numeric literal without digits', - :invalid_octal => 'invalid octal digit', - :no_dot_digit_literal => 'no . floating literal anymore; put 0 before dot', - :bare_backslash => 'bare backslash only allowed before newline', - :unexpected => "unexpected `%{character}'", - :embedded_document => 'embedded document meets end of file (and they embark on a romantic journey)', - :heredoc_id_has_newline => 'here document identifier across newlines, never match', - :heredoc_id_ends_with_nl => 'here document identifier ends with a newline', - :unterminated_heredoc_id => 'unterminated heredoc id', + :unicode_point_too_large => 'invalid Unicode codepoint (too large)', + :invalid_escape => 'invalid escape character syntax', + :incomplete_escape => 'incomplete character syntax', + :invalid_hex_escape => 'invalid hex escape', + :invalid_unicode_escape => 'invalid Unicode escape', + :unterminated_unicode => 'unterminated Unicode escape', + :escape_eof => 'escape sequence meets end of file', + :string_eof => 'unterminated string meets end of file', + :regexp_options => 'unknown regexp options: %{options}', + :cvar_name => "`%{name}' is not allowed as a class variable name", + :ivar_name => "`%{name}' is not allowed as an instance variable name", + :trailing_in_number => "trailing `%{character}' in number", + :empty_numeric => 'numeric literal without digits', + :invalid_octal => 'invalid octal digit', + :no_dot_digit_literal => 'no . floating literal anymore; put 0 before dot', + :bare_backslash => 'bare backslash only allowed before newline', + :unexpected => "unexpected `%{character}'", + :embedded_document => 'embedded document meets end of file (and they embark on a romantic journey)', + :heredoc_id_has_newline => 'here document identifier across newlines, never match', + :heredoc_id_ends_with_nl => 'here document identifier ends with a newline', + :unterminated_heredoc_id => 'unterminated heredoc id', + :leading_zero_in_numparam => 'leading zero is not allowed as a numbered parameter', + :numparam_outside_block => 'numbered parameter outside block', + :too_large_numparam => 'too large numbered parameter', + :ordinary_param_defined => 'ordinary parameter is defined', # Lexer warnings :invalid_escape_use => 'invalid character syntax; use ?%{escape}', diff --git a/lib/parser/meta.rb b/lib/parser/meta.rb index 6f311650b..3fe224241 100644 --- a/lib/parser/meta.rb +++ b/lib/parser/meta.rb @@ -26,6 +26,7 @@ module class sclass def defs undef alias args ident root lambda indexasgn index procarg0 meth_ref restarg_expr blockarg_expr objc_kwarg objc_restarg objc_varargs + numargs numblock numparam ).map(&:to_sym).to_set.freeze end # Meta diff --git a/lib/parser/ruby27.y b/lib/parser/ruby27.y index 5a491dd47..ff8b28276 100644 --- a/lib/parser/ruby27.y +++ b/lib/parser/ruby27.y @@ -17,7 +17,7 @@ token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS tWORDS_BEG tQWORDS_BEG tSYMBOLS_BEG tQSYMBOLS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END tSTRING_DEND tSTRING tSYMBOL tNL tEH tCOLON tCOMMA tSPACE tSEMI tLAMBDA tLAMBEG tCHARACTER - tRATIONAL tIMAGINARY tLABEL_END tANDDOT tMETHREF tBDOT2 tBDOT3 + tRATIONAL tIMAGINARY tLABEL_END tANDDOT tMETHREF tBDOT2 tBDOT3 tNUMPARAM prechigh right tBANG tTILDE tUPLUS @@ -1460,14 +1460,17 @@ opt_block_args_tail: block_param_def: tPIPE opt_bv_decl tPIPE { + @lexer.max_numparam_stack.cant_have_numparams! result = @builder.args(val[0], val[1], val[2]) } | tOROP { + @lexer.max_numparam_stack.cant_have_numparams! result = @builder.args(val[0], [], val[0]) } | tPIPE block_param opt_bv_decl tPIPE { + @lexer.max_numparam_stack.cant_have_numparams! result = @builder.args(val[0], val[1].concat(val[2]), val[3]) } @@ -1498,26 +1501,34 @@ opt_block_args_tail: lambda: { @static_env.extend_dynamic + @lexer.max_numparam_stack.push + @context.push(:lambda) } f_larglist { + @context.pop @lexer.cmdarg.push(false) } lambda_body { - @lexer.cmdarg.pop - - result = [ val[1], val[3] ] + args = @lexer.max_numparam > 0 ? @builder.numargs(@lexer.max_numparam) : val[1] + result = [ args, val[3] ] + @lexer.max_numparam_stack.pop @static_env.unextend + @lexer.cmdarg.pop } f_larglist: tLPAREN2 f_args opt_bv_decl tRPAREN { + @lexer.max_numparam_stack.cant_have_numparams! result = @builder.args(val[0], val[1].concat(val[2]), val[3]) } | f_args { + if val[0].any? + @lexer.max_numparam_stack.cant_have_numparams! + end result = @builder.args(nil, val[0], nil) } @@ -1652,24 +1663,30 @@ opt_block_args_tail: brace_body: { @static_env.extend_dynamic + @lexer.max_numparam_stack.push } opt_block_param compstmt { - result = [ val[1], val[2] ] + args = @lexer.max_numparam > 0 ? @builder.numargs(@lexer.max_numparam) : val[1] + result = [ args, val[2] ] + @lexer.max_numparam_stack.pop @static_env.unextend } do_body: { @static_env.extend_dynamic + @lexer.max_numparam_stack.push } { @lexer.cmdarg.push(false) } opt_block_param bodystmt { - result = [ val[2], val[3] ] + args = @lexer.max_numparam > 0 ? @builder.numargs(@lexer.max_numparam) : val[2] + result = [ args, val[3] ] + @lexer.max_numparam_stack.pop @static_env.unextend @lexer.cmdarg.pop } @@ -1892,6 +1909,10 @@ regexp_contents: # nothing { result = @builder.cvar(val[0]) } + | tNUMPARAM + { + result = @builder.numparam(val[0]) + } | backref symbol: ssym @@ -1964,6 +1985,10 @@ regexp_contents: # nothing { result = @builder.cvar(val[0]) } + | tNUMPARAM + { + result = @builder.numparam(val[0]) + } keyword_variable: kNIL { @@ -2188,6 +2213,8 @@ keyword_variable: kNIL { @static_env.declare val[0][0] + @lexer.max_numparam_stack.cant_have_numparams! + result = val[0] } @@ -2220,6 +2247,8 @@ keyword_variable: kNIL @static_env.declare val[0][0] + @lexer.max_numparam_stack.cant_have_numparams! + result = val[0] } diff --git a/test/test_lexer.rb b/test/test_lexer.rb index ead74eddb..cb51aa35b 100644 --- a/test/test_lexer.rb +++ b/test/test_lexer.rb @@ -3581,4 +3581,61 @@ def test_meth_ref_unsupported_newlines :tPLUS, '+', [6, 7]) end + def lex_numbered_parameter(input) + @lex.max_numparam_stack.push + + @lex.context = Parser::Context.new + @lex.context.push(:block) + + source_buffer = Parser::Source::Buffer.new('(assert_lex_numbered_parameter)') + source_buffer.source = input + + @lex.source_buffer = source_buffer + + @lex.advance + end + + def assert_scanned_numbered_parameter(input) + lex_token, (lex_value, lex_range) = lex_numbered_parameter(input) + + assert_equal(lex_token, :tNUMPARAM) + assert_equal(lex_value, input.tr('@', '')) + assert_equal(lex_range.begin_pos, 0) + assert_equal(lex_range.end_pos, input.length) + end + + def refute_scanned_numbered_parameter(input, message = nil) + err = assert_raises Parser::SyntaxError do + lex_token, (lex_value, lex_range) = lex_numbered_parameter(input) + end + + if message + assert_equal(err.message, Parser::MESSAGES[message]) + + assert_equal(err.diagnostic.location.begin_pos, 0) + assert_equal(err.diagnostic.location.end_pos, input.length) + end + end + + def test_numbered_args_before_27 + setup_lexer(26) + refute_scanned_numbered_parameter('@1') + end + + def test_numbered_args_27 + setup_lexer(27) + assert_scanned_numbered_parameter('@1') + assert_equal(@lex.max_numparam, 1) + + setup_lexer(27) + assert_scanned_numbered_parameter('@100') + assert_equal(@lex.max_numparam, 100) + + setup_lexer(27) + refute_scanned_numbered_parameter('@101', :too_large_numparam) + + setup_lexer(27) + refute_scanned_numbered_parameter('@01', :leading_zero_in_numparam) + end + end diff --git a/test/test_parser.rb b/test/test_parser.rb index ceecdcd2e..454dabb31 100644 --- a/test/test_parser.rb +++ b/test/test_parser.rb @@ -7174,6 +7174,224 @@ def test_unterimated_heredoc_id__27 %q{^ location}, SINCE_2_7) end + end + + def test_numbered_args_before_27 + assert_diagnoses( + [:error, :ivar_name, { :name => '@1' }], + %q{m { @1 }}, + %q{ ^^ location}, + ALL_VERSIONS - SINCE_2_7 + ) + end + + def test_numbered_args_after_27 + assert_parses( + s(:numblock, + s(:send, nil, :m), + 15, + s(:send, + s(:numparam, 1), :+, + s(:numparam, 15))), + %q{m { @1 + @15 }}, + %q{^^^^^^^^^^^^^^ expression + | ^^ name (send/2.numparam/1) + | ^^ expression (send/2.numparam/1) + | ^^^ name (send/2.numparam/2) + | ^^^ expression (send/2.numparam/2)}, + SINCE_2_7) + + assert_parses( + s(:numblock, + s(:send, nil, :m), + 15, + s(:send, + s(:numparam, 1), :+, + s(:numparam, 15))), + %q{m do @1 + @15 end}, + %q{^^^^^^^^^^^^^^^^^ expression + | ^^ name (send/2.numparam/1) + | ^^ expression (send/2.numparam/1) + | ^^^ name (send/2.numparam/2) + | ^^^ expression (send/2.numparam/2)}, + SINCE_2_7) + + # Lambdas + + assert_parses( + s(:numblock, + s(:lambda), + 15, + s(:send, + s(:numparam, 1), :+, + s(:numparam, 15))), + %q{-> { @1 + @15}}, + %q{^^^^^^^^^^^^^^ expression + | ^^ name (send.numparam/1) + | ^^ expression (send.numparam/1) + | ^^^ name (send.numparam/2) + | ^^^ expression (send.numparam/2)}, + SINCE_2_7) + + assert_parses( + s(:numblock, + s(:lambda), + 15, + s(:send, + s(:numparam, 1), :+, + s(:numparam, 15))), + %q{-> do @1 + @15 end}, + %q{^^^^^^^^^^^^^^^^^^ expression + | ^^ name (send.numparam/1) + | ^^ expression (send.numparam/1) + | ^^^ name (send.numparam/2) + | ^^^ expression (send.numparam/2)}, + SINCE_2_7) + end + + def test_numbered_and_ordinary_parameters + # Blocks + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{m { || @1 } }, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{m { |a| @1 } }, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{m do || @1 end }, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{m do |a, b| @1 end }, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{m { |x = @1| }}, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{m { |x: @1| }}, + %q{ ^^ location}, + SINCE_2_7) + + # Lambdas + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{->() { @1 } }, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{->(a) { @1 } }, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{->() do @1 end }, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{->(a, b) do @1 end}, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{->(x=@1) {}}, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{->(x: @1) {}}, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + %q{proc {|;a| @1}}, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :ordinary_param_defined], + "proc {|\n| @1}", + %q{ ^^ location}, + SINCE_2_7) + end + + def test_numparam_outside_block + assert_diagnoses( + [:error, :numparam_outside_block], + %q{class A; @1; end}, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :numparam_outside_block], + %q{module A; @1; end}, + %q{ ^^ location}, + SINCE_2_7) + assert_diagnoses( + [:error, :numparam_outside_block], + %q{class << foo; @1; end}, + %q{ ^^ location}, + SINCE_2_7) + + assert_diagnoses( + [:error, :numparam_outside_block], + %q{def self.m; @1; end}, + %q{ ^^ location}, + SINCE_2_7) + end + + def test_ruby_bug_15789 + assert_parses( + s(:send, nil, :m, + s(:block, + s(:lambda), + s(:args, + s(:optarg, :a, + s(:numblock, + s(:lambda), 1, + s(:numparam, 1)))), + s(:lvar, :a))), + %q{m ->(a = ->{@1}) {a}}, + %q{}, + SINCE_2_7) + + assert_parses( + s(:send, nil, :m, + s(:block, + s(:lambda), + s(:args, + s(:kwoptarg, :a, + s(:numblock, + s(:lambda), 1, + s(:numparam, 1)))), + s(:lvar, :a))), + %q{m ->(a: ->{@1}) {a}}, + %q{}, + SINCE_2_7) end end