Merge pull request #367 from yui-knk/after_shift

Provide functionalities for Bring Your Own Stack
ruby · Feb 14, 2024 · 08a77ac · 08a77ac
2 parents 30e1f29 + 6b2c853
commit 08a77ac
Show file tree

Hide file tree

Showing 16 changed files with 957 additions and 457 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,39 @@
 # NEWS for Lrama
 
+## Lrama 0.6.3 (2024-02-xx)
+
+### Bring Your Own Stack
+
+Provide functionalities for Bring Your Own Stack.
+
+Ruby’s Ripper library requires their own semantic value stack to manage Ruby Objects returned by user defined callback method. Currently Ripper uses semantic value stack (`yyvsa`) which is used by parser to manage Node. This hack introduces some limitation on Ripper. For example, Ripper can not execute semantic analysis depending on Node structure.
+
+Lrama introduces two features to support another semantic value stack by parser generator users.
+
+1. Callback entry points
+
+User can emulate semantic value stack by these callbacks.
+Lrama provides these five callbacks. Registered functions are called when each event happen. For example %after-shift function is called when shift happens on original semantic value stack.
+
+* `%after-shift` function_name
+* `%before-reduce` function_name
+* `%after-reduce` function_name
+* `%after-shift-error-token` function_name
+* `%after-pop-stack` function_name
+
+2. `$:n` variable to access index of each grammar symbols
+
+User also needs to access semantic value of their stack in grammar action. `$:n` provides the way to access to it. `$:n` is translated to the minus index from the top of the stack.
+For example
+
+```
+primary: k_if expr_value then compstmt if_tail k_end
+          {
+          /*% ripper: if!($:2, $:4, $:5) %*/
+          /* $:2 = -5, $:4 = -3, $:5 = -2. */
+          }
+```
+
 ## Lrama 0.6.2 (2024-01-27)
 
 ### %no-stdlib directive

diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb
@@ -26,6 +26,7 @@ class Grammar
     attr_accessor :union, :expect,
                   :printers, :error_tokens,
                   :lex_param, :parse_param, :initial_action,
+                  :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
                   :symbols_resolver, :types,
                   :rules, :rule_builders,
                   :sym_to_rules, :no_stdlib

diff --git a/lib/lrama/grammar/code/initial_action_code.rb b/lib/lrama/grammar/code/initial_action_code.rb
@@ -6,18 +6,24 @@ class InitialActionCode < Code
 
         # * ($$) yylval
         # * (@$) yylloc
+        # * ($:$) error
         # * ($1) error
         # * (@1) error
+        # * ($:1) error
         def reference_to_c(ref)
           case
           when ref.type == :dollar && ref.name == "$" # $$
             "yylval"
           when ref.type == :at && ref.name == "$" # @$
             "yylloc"
+          when ref.type == :index && ref.name == "$" # $:$
+            raise "$:#{ref.value} can not be used in initial_action."
           when ref.type == :dollar # $n
             raise "$#{ref.value} can not be used in initial_action."
           when ref.type == :at # @n
             raise "@#{ref.value} can not be used in initial_action."
+          when ref.type == :index # $:n
+            raise "$:#{ref.value} can not be used in initial_action."
           else
             raise "Unexpected. #{self}, #{ref}"
           end

diff --git a/lib/lrama/grammar/code/no_reference_code.rb b/lib/lrama/grammar/code/no_reference_code.rb
@@ -6,14 +6,18 @@ class NoReferenceCode < Code
 
         # * ($$) error
         # * (@$) error
+        # * ($:$) error
         # * ($1) error
         # * (@1) error
+        # * ($:1) error
         def reference_to_c(ref)
           case
           when ref.type == :dollar # $$, $n
             raise "$#{ref.value} can not be used in #{type}."
           when ref.type == :at # @$, @n
             raise "@#{ref.value} can not be used in #{type}."
+          when ref.type == :index # $:$, $:n
+            raise "$:#{ref.value} can not be used in #{type}."
           else
             raise "Unexpected. #{self}, #{ref}"
           end

diff --git a/lib/lrama/grammar/code/printer_code.rb b/lib/lrama/grammar/code/printer_code.rb
@@ -11,19 +11,25 @@ def initialize(type:, token_code:, tag:)
 
         # * ($$) *yyvaluep
         # * (@$) *yylocationp
+        # * ($:$) error
         # * ($1) error
         # * (@1) error
+        # * ($:1) error
         def reference_to_c(ref)
           case
           when ref.type == :dollar && ref.name == "$" # $$
             member = @tag.member
             "((*yyvaluep).#{member})"
           when ref.type == :at && ref.name == "$" # @$
             "(*yylocationp)"
+          when ref.type == :index && ref.name == "$" # $:$
+            raise "$:#{ref.value} can not be used in #{type}."
           when ref.type == :dollar # $n
             raise "$#{ref.value} can not be used in #{type}."
           when ref.type == :at # @n
             raise "@#{ref.value} can not be used in #{type}."
+          when ref.type == :index # $:n
+            raise "$:#{ref.value} can not be used in #{type}."
           else
             raise "Unexpected. #{self}, #{ref}"
           end

diff --git a/lib/lrama/grammar/code/rule_action.rb b/lib/lrama/grammar/code/rule_action.rb
@@ -11,8 +11,10 @@ def initialize(type:, token_code:, rule:)
 
         # * ($$) yyval
         # * (@$) yyloc
+        # * ($:$) error
         # * ($1) yyvsp[i]
         # * (@1) yylsp[i]
+        # * ($:1) i - 1
         #
         #
         # Consider a rule like
@@ -24,13 +26,16 @@ def initialize(type:, token_code:, rule:)
         # "Rule"                class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
         # "Position in grammar"                   $1     $2      $3          $4          $5
         # "Index for yyvsp"                       -4     -3      -2          -1           0
+        # "$:n"                                  $:1    $:2     $:3         $:4         $:5
+        # "index of $:n"                          -5     -4      -3          -2          -1
         #
         #
         # For the first midrule action:
         #
         # "Rule"                class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
         # "Position in grammar"                   $1
         # "Index for yyvsp"                        0
+        # "$:n"                                  $:1
         def reference_to_c(ref)
           case
           when ref.type == :dollar && ref.name == "$" # $$
@@ -39,6 +44,8 @@ def reference_to_c(ref)
             "(yyval.#{tag.member})"
           when ref.type == :at && ref.name == "$" # @$
             "(yyloc)"
+          when ref.type == :index && ref.name == "$" # $:$
+            raise "$:$ is not supported"
           when ref.type == :dollar # $n
             i = -position_in_rhs + ref.index
             tag = ref.ex_tag || rhs[ref.index - 1].tag
@@ -47,6 +54,9 @@ def reference_to_c(ref)
           when ref.type == :at # @n
             i = -position_in_rhs + ref.index
             "(yylsp[#{i}])"
+          when ref.type == :index # $:n
+            i = -position_in_rhs + ref.index
+            "(#{i} - 1)"
           else
             raise "Unexpected. #{self}, #{ref}"
           end

diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb
@@ -27,6 +27,11 @@ class Lexer
       %precedence
       %prec
       %error-token
+      %before-reduce
+      %after-reduce
+      %after-shift-error-token
+      %after-shift
+      %after-pop-stack
       %empty
       %code
       %rule

diff --git a/lib/lrama/lexer/token/user_code.rb b/lib/lrama/lexer/token/user_code.rb
@@ -56,6 +56,17 @@ def scan_reference(scanner)
             return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
           when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right]  (named reference with brackets)
             return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
+
+          # $: references
+          when scanner.scan(/\$:\$/) # $:$
+            return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$:(\d+)/) # $:1
+            return Lrama::Grammar::Reference.new(type: :index, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
+            return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
+            return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
+
           end
         end
       end

diff --git a/lib/lrama/output.rb b/lib/lrama/output.rb
@@ -161,6 +161,61 @@ def user_initial_action(comment = "")
       STR
     end
 
+    def after_shift_function(comment = "")
+      return "" unless @grammar.after_shift
+
+      <<-STR
+        #{comment}
+#line #{@grammar.after_shift.line} "#{@grammar_file_path}"
+        {#{@grammar.after_shift.s_value}(#{parse_param_name});}
+#line [@oline@] [@ofile@]
+      STR
+    end
+
+    def before_reduce_function(comment = "")
+      return "" unless @grammar.before_reduce
+
+      <<-STR
+        #{comment}
+#line #{@grammar.before_reduce.line} "#{@grammar_file_path}"
+        {#{@grammar.before_reduce.s_value}(yylen#{user_args});}
+#line [@oline@] [@ofile@]
+      STR
+    end
+
+    def after_reduce_function(comment = "")
+      return "" unless @grammar.after_reduce
+
+      <<-STR
+        #{comment}
+#line #{@grammar.after_reduce.line} "#{@grammar_file_path}"
+        {#{@grammar.after_reduce.s_value}(yylen#{user_args});}
+#line [@oline@] [@ofile@]
+      STR
+    end
+
+    def after_shift_error_token_function(comment = "")
+      return "" unless @grammar.after_shift_error_token
+
+      <<-STR
+        #{comment}
+#line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}"
+        {#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});}
+#line [@oline@] [@ofile@]
+      STR
+    end
+
+    def after_pop_stack_function(len, comment = "")
+      return "" unless @grammar.after_pop_stack
+
+      <<-STR
+        #{comment}
+#line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}"
+        {#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});}
+#line [@oline@] [@ofile@]
+      STR
+    end
+
     def symbol_actions_for_error_token
       str = ""