From 313f28159b466bd05cc7c486dabdc4281a7bae3e Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Wed, 23 Aug 2023 11:42:15 +0100 Subject: [PATCH] Deprecate RubyLex and warn about referencing to it `RubyLex` has always been a private component of IRB, so we should explicitly discourage usages of it. Also, it should be placed under the `IRB` module like other components. --- lib/irb/ruby-lex.rb | 831 ++++++++++++++++---------------- test/irb/test_nesting_parser.rb | 4 +- test/irb/test_ruby_lex.rb | 24 +- 3 files changed, 436 insertions(+), 423 deletions(-) diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index 085b08997..502883bd4 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -8,489 +8,494 @@ require "jruby" if RUBY_ENGINE == "jruby" require_relative "nesting_parser" -# :stopdoc: -class RubyLex - ASSIGNMENT_NODE_TYPES = [ - # Local, instance, global, class, constant, instance, and index assignment: - # "foo = bar", - # "@foo = bar", - # "$foo = bar", - # "@@foo = bar", - # "::Foo = bar", - # "a::Foo = bar", - # "Foo = bar" - # "foo.bar = 1" - # "foo[1] = bar" - :assign, - - # Operation assignment: - # "foo += bar" - # "foo -= bar" - # "foo ||= bar" - # "foo &&= bar" - :opassign, - - # Multiple assignment: - # "foo, bar = 1, 2 - :massign, - ] - - class TerminateLineInput < StandardError - def initialize - super("Terminate Line Input") +module IRB + # :stopdoc: + class RubyLex + ASSIGNMENT_NODE_TYPES = [ + # Local, instance, global, class, constant, instance, and index assignment: + # "foo = bar", + # "@foo = bar", + # "$foo = bar", + # "@@foo = bar", + # "::Foo = bar", + # "a::Foo = bar", + # "Foo = bar" + # "foo.bar = 1" + # "foo[1] = bar" + :assign, + + # Operation assignment: + # "foo += bar" + # "foo -= bar" + # "foo ||= bar" + # "foo &&= bar" + :opassign, + + # Multiple assignment: + # "foo, bar = 1, 2 + :massign, + ] + + class TerminateLineInput < StandardError + def initialize + super("Terminate Line Input") + end end - end - attr_reader :line_no - - def initialize(context) - @context = context - @line_no = 1 - @prompt = nil - end + attr_reader :line_no - def self.compile_with_errors_suppressed(code, line_no: 1) - begin - result = yield code, line_no - rescue ArgumentError - # Ruby can issue an error for the code if there is an - # incomplete magic comment for encoding in it. Force an - # expression with a new line before the code in this - # case to prevent magic comment handling. To make sure - # line numbers in the lexed code remain the same, - # decrease the line number by one. - code = ";\n#{code}" - line_no -= 1 - result = yield code, line_no + def initialize(context) + @context = context + @line_no = 1 + @prompt = nil end - result - end - def set_prompt(&block) - @prompt = block - end + def self.compile_with_errors_suppressed(code, line_no: 1) + begin + result = yield code, line_no + rescue ArgumentError + # Ruby can issue an error for the code if there is an + # incomplete magic comment for encoding in it. Force an + # expression with a new line before the code in this + # case to prevent magic comment handling. To make sure + # line numbers in the lexed code remain the same, + # decrease the line number by one. + code = ";\n#{code}" + line_no -= 1 + result = yield code, line_no + end + result + end - ERROR_TOKENS = [ - :on_parse_error, - :compile_error, - :on_assign_error, - :on_alias_error, - :on_class_name_error, - :on_param_error - ] - - def self.generate_local_variables_assign_code(local_variables) - "#{local_variables.join('=')}=nil;" unless local_variables.empty? - end + def set_prompt(&block) + @prompt = block + end - # Some part of the code is not included in Ripper's token. - # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr. - # With interpolated tokens, tokens.map(&:tok).join will be equal to code. - def self.interpolate_ripper_ignored_tokens(code, tokens) - line_positions = [0] - code.lines.each do |line| - line_positions << line_positions.last + line.bytesize + ERROR_TOKENS = [ + :on_parse_error, + :compile_error, + :on_assign_error, + :on_alias_error, + :on_class_name_error, + :on_param_error + ] + + def self.generate_local_variables_assign_code(local_variables) + "#{local_variables.join('=')}=nil;" unless local_variables.empty? end - prev_byte_pos = 0 - interpolated = [] - prev_line = 1 - tokens.each do |t| - line, col = t.pos - byte_pos = line_positions[line - 1] + col - if prev_byte_pos < byte_pos - tok = code.byteslice(prev_byte_pos...byte_pos) + + # Some part of the code is not included in Ripper's token. + # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr. + # With interpolated tokens, tokens.map(&:tok).join will be equal to code. + def self.interpolate_ripper_ignored_tokens(code, tokens) + line_positions = [0] + code.lines.each do |line| + line_positions << line_positions.last + line.bytesize + end + prev_byte_pos = 0 + interpolated = [] + prev_line = 1 + tokens.each do |t| + line, col = t.pos + byte_pos = line_positions[line - 1] + col + if prev_byte_pos < byte_pos + tok = code.byteslice(prev_byte_pos...byte_pos) + pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] + interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) + prev_line += tok.count("\n") + end + interpolated << t + prev_byte_pos = byte_pos + t.tok.bytesize + prev_line += t.tok.count("\n") + end + if prev_byte_pos < code.bytesize + tok = code.byteslice(prev_byte_pos..) pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) - prev_line += tok.count("\n") end - interpolated << t - prev_byte_pos = byte_pos + t.tok.bytesize - prev_line += t.tok.count("\n") + interpolated end - if prev_byte_pos < code.bytesize - tok = code.byteslice(prev_byte_pos..) - pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] - interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) - end - interpolated - end - def self.ripper_lex_without_warning(code, context: nil) - verbose, $VERBOSE = $VERBOSE, nil - lvars_code = generate_local_variables_assign_code(context&.local_variables || []) - original_code = code - if lvars_code - code = "#{lvars_code}\n#{code}" - line_no = 0 - else - line_no = 1 - end + def self.ripper_lex_without_warning(code, context: nil) + verbose, $VERBOSE = $VERBOSE, nil + lvars_code = generate_local_variables_assign_code(context&.local_variables || []) + original_code = code + if lvars_code + code = "#{lvars_code}\n#{code}" + line_no = 0 + else + line_no = 1 + end - compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no| - lexer = Ripper::Lexer.new(inner_code, '-', line_no) - tokens = [] - lexer.scan.each do |t| - next if t.pos.first == 0 - prev_tk = tokens.last - position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize - if position_overlapped - tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event) - else - tokens << t + compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no| + lexer = Ripper::Lexer.new(inner_code, '-', line_no) + tokens = [] + lexer.scan.each do |t| + next if t.pos.first == 0 + prev_tk = tokens.last + position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize + if position_overlapped + tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event) + else + tokens << t + end end + interpolate_ripper_ignored_tokens(original_code, tokens) end - interpolate_ripper_ignored_tokens(original_code, tokens) + ensure + $VERBOSE = verbose end - ensure - $VERBOSE = verbose - end - def prompt(opens, continue, line_num_offset) - ltype = ltype_from_open_tokens(opens) - indent_level = calc_indent_level(opens) - @prompt&.call(ltype, indent_level, opens.any? || continue, @line_no + line_num_offset) - end + def prompt(opens, continue, line_num_offset) + ltype = ltype_from_open_tokens(opens) + indent_level = calc_indent_level(opens) + @prompt&.call(ltype, indent_level, opens.any? || continue, @line_no + line_num_offset) + end - def check_code_state(code) - tokens = self.class.ripper_lex_without_warning(code, context: @context) - opens = IRB::NestingParser.open_tokens(tokens) - [tokens, opens, code_terminated?(code, tokens, opens)] - end + def check_code_state(code) + tokens = self.class.ripper_lex_without_warning(code, context: @context) + opens = NestingParser.open_tokens(tokens) + [tokens, opens, code_terminated?(code, tokens, opens)] + end - def code_terminated?(code, tokens, opens) - case check_code_syntax(code) - when :unrecoverable_error - true - when :recoverable_error - false - when :other_error - opens.empty? && !should_continue?(tokens) - when :valid - !should_continue?(tokens) + def code_terminated?(code, tokens, opens) + case check_code_syntax(code) + when :unrecoverable_error + true + when :recoverable_error + false + when :other_error + opens.empty? && !should_continue?(tokens) + when :valid + !should_continue?(tokens) + end end - end - def save_prompt_to_context_io(opens, continue, line_num_offset) - # Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`. - prompt(opens, continue, line_num_offset) - end + def save_prompt_to_context_io(opens, continue, line_num_offset) + # Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`. + prompt(opens, continue, line_num_offset) + end - def increase_line_no(addition) - @line_no += addition - end + def increase_line_no(addition) + @line_no += addition + end - def assignment_expression?(code) - # Try to parse the code and check if the last of possibly multiple - # expressions is an assignment type. - - # If the expression is invalid, Ripper.sexp should return nil which will - # result in false being returned. Any valid expression should return an - # s-expression where the second element of the top level array is an - # array of parsed expressions. The first element of each expression is the - # expression's type. - verbose, $VERBOSE = $VERBOSE, nil - code = "#{RubyLex.generate_local_variables_assign_code(@context.local_variables) || 'nil;'}\n#{code}" - # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part. - node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0) - ASSIGNMENT_NODE_TYPES.include?(node_type) - ensure - $VERBOSE = verbose - end + def assignment_expression?(code) + # Try to parse the code and check if the last of possibly multiple + # expressions is an assignment type. - def should_continue?(tokens) - # Look at the last token and check if IRB need to continue reading next line. - # Example code that should continue: `a\` `a +` `a.` - # Trailing spaces, newline, comments are skipped - return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n" - - tokens.reverse_each do |token| - case token.event - when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end - # Skip - when :on_regexp_end, :on_heredoc_end, :on_semicolon - # State is EXPR_BEG but should not continue - return false - else - # Endless range should not continue - return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/) + # If the expression is invalid, Ripper.sexp should return nil which will + # result in false being returned. Any valid expression should return an + # s-expression where the second element of the top level array is an + # array of parsed expressions. The first element of each expression is the + # expression's type. + verbose, $VERBOSE = $VERBOSE, nil + code = "#{RubyLex.generate_local_variables_assign_code(@context.local_variables) || 'nil;'}\n#{code}" + # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part. + node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0) + ASSIGNMENT_NODE_TYPES.include?(node_type) + ensure + $VERBOSE = verbose + end - # EXPR_DOT and most of the EXPR_BEG should continue - return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT) + def should_continue?(tokens) + # Look at the last token and check if IRB need to continue reading next line. + # Example code that should continue: `a\` `a +` `a.` + # Trailing spaces, newline, comments are skipped + return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n" + + tokens.reverse_each do |token| + case token.event + when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end + # Skip + when :on_regexp_end, :on_heredoc_end, :on_semicolon + # State is EXPR_BEG but should not continue + return false + else + # Endless range should not continue + return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/) + + # EXPR_DOT and most of the EXPR_BEG should continue + return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT) + end end + false end - false - end - def check_code_syntax(code) - lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables) - code = "#{lvars_code}\n#{code}" + def check_code_syntax(code) + lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables) + code = "#{lvars_code}\n#{code}" - begin # check if parser error are available - verbose, $VERBOSE = $VERBOSE, nil - case RUBY_ENGINE - when 'ruby' - self.class.compile_with_errors_suppressed(code) do |inner_code, line_no| - RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no) + begin # check if parser error are available + verbose, $VERBOSE = $VERBOSE, nil + case RUBY_ENGINE + when 'ruby' + self.class.compile_with_errors_suppressed(code) do |inner_code, line_no| + RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no) + end + when 'jruby' + JRuby.compile_ir(code) + else + catch(:valid) do + eval("BEGIN { throw :valid, true }\n#{code}") + false + end end - when 'jruby' - JRuby.compile_ir(code) - else - catch(:valid) do - eval("BEGIN { throw :valid, true }\n#{code}") - false + rescue EncodingError + # This is for a hash with invalid encoding symbol, {"\xAE": 1} + :unrecoverable_error + rescue SyntaxError => e + case e.message + when /unterminated (?:string|regexp) meets end of file/ + # "unterminated regexp meets end of file" + # + # example: + # / + # + # "unterminated string meets end of file" + # + # example: + # ' + return :recoverable_error + when /syntax error, unexpected end-of-input/ + # "syntax error, unexpected end-of-input, expecting keyword_end" + # + # example: + # if true + # hoge + # if false + # fuga + # end + return :recoverable_error + when /syntax error, unexpected keyword_end/ + # "syntax error, unexpected keyword_end" + # + # example: + # if ( + # end + # + # example: + # end + return :unrecoverable_error + when /syntax error, unexpected '\.'/ + # "syntax error, unexpected '.'" + # + # example: + # . + return :unrecoverable_error + when /unexpected tREGEXP_BEG/ + # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" + # + # example: + # method / f / + return :unrecoverable_error + else + return :other_error end + ensure + $VERBOSE = verbose end - rescue EncodingError - # This is for a hash with invalid encoding symbol, {"\xAE": 1} - :unrecoverable_error - rescue SyntaxError => e - case e.message - when /unterminated (?:string|regexp) meets end of file/ - # "unterminated regexp meets end of file" - # - # example: - # / - # - # "unterminated string meets end of file" - # - # example: - # ' - return :recoverable_error - when /syntax error, unexpected end-of-input/ - # "syntax error, unexpected end-of-input, expecting keyword_end" - # - # example: - # if true - # hoge - # if false - # fuga - # end - return :recoverable_error - when /syntax error, unexpected keyword_end/ - # "syntax error, unexpected keyword_end" - # - # example: - # if ( - # end - # - # example: - # end - return :unrecoverable_error - when /syntax error, unexpected '\.'/ - # "syntax error, unexpected '.'" - # - # example: - # . - return :unrecoverable_error - when /unexpected tREGEXP_BEG/ - # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" - # - # example: - # method / f / - return :unrecoverable_error - else - return :other_error - end - ensure - $VERBOSE = verbose + :valid end - :valid - end - def calc_indent_level(opens) - indent_level = 0 - opens.each_with_index do |t, index| - case t.event - when :on_heredoc_beg - if opens[index + 1]&.event != :on_heredoc_beg - if t.tok.match?(/^<<[~-]/) - indent_level += 1 - else - indent_level = 0 + def calc_indent_level(opens) + indent_level = 0 + opens.each_with_index do |t, index| + case t.event + when :on_heredoc_beg + if opens[index + 1]&.event != :on_heredoc_beg + if t.tok.match?(/^<<[~-]/) + indent_level += 1 + else + indent_level = 0 + end end + when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick + # No indent: "", //, :"", `` + # Indent: %(), %r(), %i(), %x() + indent_level += 1 if t.tok.start_with? '%' + when :on_embdoc_beg + indent_level = 0 + else + indent_level += 1 end - when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick - # No indent: "", //, :"", `` - # Indent: %(), %r(), %i(), %x() - indent_level += 1 if t.tok.start_with? '%' - when :on_embdoc_beg - indent_level = 0 - else - indent_level += 1 end + indent_level end - indent_level - end - FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg] + FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg] - def free_indent_token?(token) - FREE_INDENT_TOKENS.include?(token&.event) - end - - # Calculates the difference of pasted code's indent and indent calculated from tokens - def indent_difference(lines, line_results, line_index) - loop do - _tokens, prev_opens, _next_opens, min_depth = line_results[line_index] - open_token = prev_opens.last - if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token)) - # If the leading whitespace is an indent, return the difference - indent_level = calc_indent_level(prev_opens.take(min_depth)) - calculated_indent = 2 * indent_level - actual_indent = lines[line_index][/^ */].size - return actual_indent - calculated_indent - elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/) - return 0 - end - # If the leading whitespace is not an indent but part of a multiline token - # Calculate base_indent of the multiline token's beginning line - line_index = open_token.pos[0] - 1 + def free_indent_token?(token) + FREE_INDENT_TOKENS.include?(token&.event) end - end - def process_indent_level(tokens, lines, line_index, is_newline) - line_results = IRB::NestingParser.parse_by_line(tokens) - result = line_results[line_index] - if result - _tokens, prev_opens, next_opens, min_depth = result - else - # When last line is empty - prev_opens = next_opens = line_results.last[2] - min_depth = next_opens.size + # Calculates the difference of pasted code's indent and indent calculated from tokens + def indent_difference(lines, line_results, line_index) + loop do + _tokens, prev_opens, _next_opens, min_depth = line_results[line_index] + open_token = prev_opens.last + if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token)) + # If the leading whitespace is an indent, return the difference + indent_level = calc_indent_level(prev_opens.take(min_depth)) + calculated_indent = 2 * indent_level + actual_indent = lines[line_index][/^ */].size + return actual_indent - calculated_indent + elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/) + return 0 + end + # If the leading whitespace is not an indent but part of a multiline token + # Calculate base_indent of the multiline token's beginning line + line_index = open_token.pos[0] - 1 + end end - # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation. - # Shortest open tokens can be calculated by `opens.take(min_depth)` - indent = 2 * calc_indent_level(prev_opens.take(min_depth)) + def process_indent_level(tokens, lines, line_index, is_newline) + line_results = NestingParser.parse_by_line(tokens) + result = line_results[line_index] + if result + _tokens, prev_opens, next_opens, min_depth = result + else + # When last line is empty + prev_opens = next_opens = line_results.last[2] + min_depth = next_opens.size + end - preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size + # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation. + # Shortest open tokens can be calculated by `opens.take(min_depth)` + indent = 2 * calc_indent_level(prev_opens.take(min_depth)) - prev_open_token = prev_opens.last - next_open_token = next_opens.last + preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size - # Calculates base indent for pasted code on the line where prev_open_token is located - # irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0 - # irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2 - # irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4 - if prev_open_token - base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max - else - base_indent = 0 - end + prev_open_token = prev_opens.last + next_open_token = next_opens.last - if free_indent_token?(prev_open_token) - if is_newline && prev_open_token.pos[0] == line_index - # First newline inside free-indent token - base_indent + indent + # Calculates base indent for pasted code on the line where prev_open_token is located + # irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0 + # irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2 + # irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4 + if prev_open_token + base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max else - # Accept any number of indent inside free-indent token - preserve_indent + base_indent = 0 end - elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg - if prev_open_token&.event == next_open_token&.event - # Accept any number of indent inside embdoc content - preserve_indent - else - # =begin or =end - 0 - end - elsif prev_open_token&.event == :on_heredoc_beg - tok = prev_open_token.tok - if prev_opens.size <= next_opens.size - if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token - # First line in heredoc - tok.match?(/^<<[-~]/) ? base_indent + indent : indent - elsif tok.match?(/^<<~/) - # Accept extra indent spaces inside `<<~` heredoc - [base_indent + indent, preserve_indent].max + + if free_indent_token?(prev_open_token) + if is_newline && prev_open_token.pos[0] == line_index + # First newline inside free-indent token + base_indent + indent else - # Accept any number of indent inside other heredoc + # Accept any number of indent inside free-indent token preserve_indent end + elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg + if prev_open_token&.event == next_open_token&.event + # Accept any number of indent inside embdoc content + preserve_indent + else + # =begin or =end + 0 + end + elsif prev_open_token&.event == :on_heredoc_beg + tok = prev_open_token.tok + if prev_opens.size <= next_opens.size + if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token + # First line in heredoc + tok.match?(/^<<[-~]/) ? base_indent + indent : indent + elsif tok.match?(/^<<~/) + # Accept extra indent spaces inside `<<~` heredoc + [base_indent + indent, preserve_indent].max + else + # Accept any number of indent inside other heredoc + preserve_indent + end + else + # Heredoc close + prev_line_indent_level = calc_indent_level(prev_opens) + tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0 + end else - # Heredoc close - prev_line_indent_level = calc_indent_level(prev_opens) - tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0 + base_indent + indent end - else - base_indent + indent end - end - LTYPE_TOKENS = %i[ - on_heredoc_beg on_tstring_beg - on_regexp_beg on_symbeg on_backtick - on_symbols_beg on_qsymbols_beg - on_words_beg on_qwords_beg - ] + LTYPE_TOKENS = %i[ + on_heredoc_beg on_tstring_beg + on_regexp_beg on_symbeg on_backtick + on_symbols_beg on_qsymbols_beg + on_words_beg on_qwords_beg + ] - def ltype_from_open_tokens(opens) - start_token = opens.reverse_each.find do |tok| - LTYPE_TOKENS.include?(tok.event) - end - return nil unless start_token - - case start_token&.event - when :on_tstring_beg - case start_token&.tok - when ?" then ?" - when /^%.$/ then ?" - when /^%Q.$/ then ?" - when ?' then ?' - when /^%q.$/ then ?' + def ltype_from_open_tokens(opens) + start_token = opens.reverse_each.find do |tok| + LTYPE_TOKENS.include?(tok.event) end - when :on_regexp_beg then ?/ - when :on_symbeg then ?: - when :on_backtick then ?` - when :on_qwords_beg then ?] - when :on_words_beg then ?] - when :on_qsymbols_beg then ?] - when :on_symbols_beg then ?] - when :on_heredoc_beg - start_token&.tok =~ /<<[-~]?(['"`])\w+\1/ - $1 || ?" - else - nil - end - end - - def check_termination_in_prev_line(code) - tokens = self.class.ripper_lex_without_warning(code, context: @context) - past_first_newline = false - index = tokens.rindex do |t| - # traverse first token before last line - if past_first_newline - if t.tok.include?("\n") - true + return nil unless start_token + + case start_token&.event + when :on_tstring_beg + case start_token&.tok + when ?" then ?" + when /^%.$/ then ?" + when /^%Q.$/ then ?" + when ?' then ?' + when /^%q.$/ then ?' end - elsif t.tok.include?("\n") - past_first_newline = true - false + when :on_regexp_beg then ?/ + when :on_symbeg then ?: + when :on_backtick then ?` + when :on_qwords_beg then ?] + when :on_words_beg then ?] + when :on_qsymbols_beg then ?] + when :on_symbols_beg then ?] + when :on_heredoc_beg + start_token&.tok =~ /<<[-~]?(['"`])\w+\1/ + $1 || ?" else - false + nil end end - if index - first_token = nil - last_line_tokens = tokens[(index + 1)..(tokens.size - 1)] - last_line_tokens.each do |t| - unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event) - first_token = t - break + def check_termination_in_prev_line(code) + tokens = self.class.ripper_lex_without_warning(code, context: @context) + past_first_newline = false + index = tokens.rindex do |t| + # traverse first token before last line + if past_first_newline + if t.tok.include?("\n") + true + end + elsif t.tok.include?("\n") + past_first_newline = true + false + else + false end end - if first_token && first_token.state != Ripper::EXPR_DOT - tokens_without_last_line = tokens[0..index] - code_without_last_line = tokens_without_last_line.map(&:tok).join - opens_without_last_line = IRB::NestingParser.open_tokens(tokens_without_last_line) - if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line) - return last_line_tokens.map(&:tok).join + if index + first_token = nil + last_line_tokens = tokens[(index + 1)..(tokens.size - 1)] + last_line_tokens.each do |t| + unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event) + first_token = t + break + end + end + + if first_token && first_token.state != Ripper::EXPR_DOT + tokens_without_last_line = tokens[0..index] + code_without_last_line = tokens_without_last_line.map(&:tok).join + opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line) + if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line) + return last_line_tokens.map(&:tok).join + end end end + false end - false end + # :startdoc: end -# :startdoc: + +RubyLex = IRB::RubyLex +Object.deprecate_constant(:RubyLex) diff --git a/test/irb/test_nesting_parser.rb b/test/irb/test_nesting_parser.rb index 83c7fb08a..ea3a23aaf 100644 --- a/test/irb/test_nesting_parser.rb +++ b/test/irb/test_nesting_parser.rb @@ -14,7 +14,7 @@ def teardown end def parse_by_line(code) - IRB::NestingParser.parse_by_line(RubyLex.ripper_lex_without_warning(code)) + IRB::NestingParser.parse_by_line(IRB::RubyLex.ripper_lex_without_warning(code)) end def test_open_tokens @@ -27,7 +27,7 @@ def f x: " #{p(1, 2, 3 EOS - opens = IRB::NestingParser.open_tokens(RubyLex.ripper_lex_without_warning(code)) + opens = IRB::NestingParser.open_tokens(IRB::RubyLex.ripper_lex_without_warning(code)) assert_equal(%w[class def if do { " #{ (], opens.map(&:tok)) end diff --git a/test/irb/test_ruby_lex.rb b/test/irb/test_ruby_lex.rb index 338ff3875..81595cca4 100644 --- a/test/irb/test_ruby_lex.rb +++ b/test/irb/test_ruby_lex.rb @@ -20,7 +20,7 @@ def test_interpolate_token_with_heredoc_and_unclosed_embexpr #{⑤&<