diff --git a/README.YARD.md b/README.YARD.md index bf8a31d..568cc5d 100644 --- a/README.YARD.md +++ b/README.YARD.md @@ -8,5 +8,5 @@ This is a design choice. It does create some pressure on garbage collector, but completely eliminates all concurrency and aliasing problems. -See also {AST::Node}, {AST::Processor} and {AST::Sexp} for additional -recommendations and design patterns. +See also {AST::Node}, {AST::Processor::Behavior} and {AST::Sexp} for +additional recommendations and design patterns. diff --git a/lib/ast.rb b/lib/ast.rb index 4ca859c..1e98c00 100644 --- a/lib/ast.rb +++ b/lib/ast.rb @@ -7,8 +7,8 @@ # garbage collector, but completely eliminates all concurrency # and aliasing problems. # -# See also {AST::Node}, {AST::Processor} and {AST::Sexp} for additional -# recommendations and design patterns. +# See also {AST::Node}, {AST::Processor::Behavior} and {AST::Sexp} for +# additional recommendations and design patterns. # module AST require 'ast/node' diff --git a/lib/ast/processor.rb b/lib/ast/processor.rb index c5b5452..62e5b1a 100644 --- a/lib/ast/processor.rb +++ b/lib/ast/processor.rb @@ -1,266 +1,12 @@ module AST - # Processor is a class which helps transforming one AST into another. - # In a nutshell, the {#process} method accepts a {Node} and dispatches - # it to a handler corresponding to its type, and returns a (possibly) - # updated variant of the node. + # This class includes {AST::Processor::Behavior}; however, it is + # deprecated, since the module defines all of the behaviors that + # the processor includes. Any new libraries should use + # {AST::Processor::Behavior} instead of subclassing this. # - # Processor has a set of associated design patterns. They are best - # explained with a concrete example. Let's define a simple arithmetic - # language and an AST format for it: - # - # Terminals (AST nodes which do not have other AST nodes inside): - # - # * `(integer )`, - # - # Nonterminals (AST nodes with other nodes as children): - # - # * `(add )`, - # * `(multiply )`, - # * `(divide )`, - # * `(negate )`, - # * `(store )`: stores value of `` into a variable named ``, - # * `(load )`: loads value of a variable named ``, - # * `(each ...): computes each of the ``s and prints the result. - # - # All AST nodes have the same Ruby class, and therefore they don't - # know how to traverse themselves. (A solution which dynamically checks the - # type of children is possible, but is slow and error-prone.) So, a subclass - # of Processor which knows how to traverse the entire tree should be defined. - # Such subclass has a handler for each nonterminal node which recursively - # processes children nodes: - # - # require 'ast' - # - # class ArithmeticsProcessor < AST::Processor - # # This method traverses any binary operators such as (add) or (multiply). - # def process_binary_op(node) - # # Children aren't decomposed automatically; it is suggested to use Ruby - # # multiple assignment expansion, as it is very convenient here. - # left_expr, right_expr = *node - # - # # AST::Node#updated won't change node type if nil is passed as a first - # # argument, which allows to reuse the same handler for multiple node types - # # using `alias' (below). - # node.updated(nil, [ - # process(left_expr), - # process(right_expr) - # ]) - # end - # alias on_add process_binary_op - # alias on_multiply process_binary_op - # alias on_divide process_binary_op - # - # def on_negate(node) - # # It is also possible to use #process_all for more compact code - # # if every child is a Node. - # node.updated(nil, process_all(node)) - # end - # - # def on_store(node) - # expr, variable_name = *node - # - # # Note that variable_name is not a Node and thus isn't passed to #process. - # node.updated(nil, [ - # process(expr), - # variable_name - # ]) - # end - # - # # (load) is effectively a terminal node, and so it does not need - # # an explicit handler, as the following is the default behavior. - # def on_load(node) - # nil - # end - # - # def on_each(node) - # node.updated(nil, process_all(node)) - # end - # end - # - # Let's test our ArithmeticsProcessor: - # - # include AST::Sexp - # expr = s(:add, s(:integer, 2), s(:integer, 2)) - # - # p ArithmeticsProcessor.new.process(expr) == expr # => true - # - # As expected, it does not change anything at all. This isn't actually - # very useful, so let's now define a Calculator, which will compute the - # expression values: - # - # # This Processor folds nonterminal nodes and returns an (integer) - # # terminal node. - # class ArithmeticsCalculator < ArithmeticsProcessor - # def compute_op(node) - # # First, node children are processed and then unpacked to local - # # variables. - # nodes = process_all(node) - # - # if nodes.all? { |node| node.type == :integer } - # # If each of those nodes represents a literal, we can fold this - # # node! - # values = nodes.map { |node| node.children.first } - # AST::Node.new(:integer, [ - # yield(values) - # ]) - # else - # # Otherwise, we can just leave the current node in the tree and - # # only update it with processed children nodes, which can be - # # partially folded. - # node.updated(nil, nodes) - # end - # end - # - # def on_add(node) - # compute_op(node) { |left, right| left + right } - # end - # - # def on_multiply(node) - # compute_op(node) { |left, right| left * right } - # end - # end - # - # Let's check: - # - # p ArithmeticsCalculator.new.process(expr) # => (integer 4) - # - # Excellent, the calculator works! Now, a careful reader could notice that - # the ArithmeticsCalculator does not know how to divide numbers. What if we - # pass an expression with division to it? - # - # expr_with_division = \ - # s(:add, - # s(:integer, 1), - # s(:divide, - # s(:add, s(:integer, 8), s(:integer, 4)), - # s(:integer, 3))) # 1 + (8 + 4) / 3 - # - # folded_expr_with_division = ArithmeticsCalculator.new.process(expr_with_division) - # p folded_expr_with_division - # # => (add - # # (integer 1) - # # (divide - # # (integer 12) - # # (integer 3))) - # - # As you can see, the expression was folded _partially_: the inner `(add)` node which - # could be computed was folded to `(integer 12)`, the `(divide)` node is left as-is - # because there is no computing handler for it, and the root `(add)` node was also left - # as it is because some of its children were not literals. - # - # Note that this partial folding is only possible because the _data_ format, i.e. - # the format in which the computed values of the nodes are represented, is the same as - # the AST itself. - # - # Let's extend our ArithmeticsCalculator class further. - # - # class ArithmeticsCalculator - # def on_divide(node) - # compute_op(node) { |left, right| left / right } - # end - # - # def on_negate(node) - # # Note how #compute_op works regardless of the operator arity. - # compute_op(node) { |value| -value } - # end - # end - # - # Now, let's apply our renewed ArithmeticsCalculator to a partial result of previous - # evaluation: - # - # p ArithmeticsCalculator.new.process(expr_with_division) # => (integer 5) - # - # Five! Excellent. This is also pretty much how CRuby 1.8 executed its programs. - # - # Now, let's do some automated bug searching. Division by zero is an error, right? - # So if we could detect that someone has divided by zero before the program is even - # run, that could save some debugging time. - # - # class DivisionByZeroVerifier < ArithmeticsProcessor - # class VerificationFailure < Exception; end - # - # def on_divide(node) - # # You need to process the children to handle nested divisions - # # such as: - # # (divide - # # (integer 1) - # # (divide (integer 1) (integer 0)) - # left, right = process_all(node) - # - # if right.type == :integer && - # right.children.first == 0 - # raise VerificationFailure, "Ouch! This code divides by zero." - # end - # end - # - # def divides_by_zero?(ast) - # process(ast) - # false - # rescue VerificationFailure - # true - # end - # end - # - # nice_expr = \ - # s(:divide, - # s(:add, s(:integer, 10), s(:integer, 2)), - # s(:integer, 4)) - # - # p DivisionByZeroVerifier.new.divides_by_zero?(nice_expr) - # # => false. Good. - # - # bad_expr = \ - # s(:add, s(:integer, 10), - # s(:divide, s(:integer, 1), s(:integer, 0))) - # - # p DivisionByZeroVerifier.new.divides_by_zero?(bad_expr) - # # => true. WHOOPS. DO NOT RUN THIS. - # - # Of course, this won't detect more complex cases... unless you use some partial - # evaluation before! The possibilites are endless. Have fun. + # @deprecated Use {AST::Processor::Behavior} instead. class Processor - # Dispatches `node`. If a node has type `:foo`, then a handler named - # `on_foo` is invoked with one argument, the `node`; if there isn't - # such a handler, {#handler_missing} is invoked with the same argument. - # - # If the handler returns `nil`, `node` is returned; otherwise, the return - # value of the handler is passed along. - # - # @param [AST::Node, nil] node - # @return [AST::Node, nil] - def process(node) - return if node.nil? - - node = node.to_ast - - # Invoke a specific handler - on_handler = :"on_#{node.type}" - if respond_to? on_handler - new_node = send on_handler, node - else - new_node = handler_missing(node) - end - - node = new_node if new_node - - node - end - - # {#process}es each node from `nodes` and returns an array of results. - # - # @param [Array] nodes - # @return [Array] - def process_all(nodes) - nodes.to_a.map do |node| - process node - end - end - - # Default handler. Does nothing. - # - # @param [AST::Node] node - # @return [AST::Node, nil] - def handler_missing(node) - end + require 'ast/processor/behavior' + include Behavior end end diff --git a/lib/ast/processor/behavior.rb b/lib/ast/processor/behavior.rb new file mode 100644 index 0000000..d0ba54f --- /dev/null +++ b/lib/ast/processor/behavior.rb @@ -0,0 +1,288 @@ +module AST + class Processor + # The processor module is a module which helps transforming one + # AST into another. In a nutshell, the {#process} method accepts + # a {Node} and dispatches it to a handler corresponding to its + # type, and returns a (possibly) updated variant of the node. + # + # The processor module has a set of associated design patterns. + # They are best explained with a concrete example. Let's define a + # simple arithmetic language and an AST format for it: + # + # Terminals (AST nodes which do not have other AST nodes inside): + # + # * `(integer )`, + # + # Nonterminals (AST nodes with other nodes as children): + # + # * `(add )`, + # * `(multiply )`, + # * `(divide )`, + # * `(negate )`, + # * `(store )`: stores value of `` + # into a variable named ``, + # * `(load )`: loads value of a variable named + # ``, + # * `(each ...): computes each of the ``s and + # prints the result. + # + # All AST nodes have the same Ruby class, and therefore they don't + # know how to traverse themselves. (A solution which dynamically + # checks the type of children is possible, but is slow and + # error-prone.) So, a class including the module which knows how + # to traverse the entire tree should be defined. Such classes + # have a handler for each nonterminal node which recursively + # processes children nodes: + # + # require 'ast' + # + # class ArithmeticsProcessor + # include AST::Processor::Module + # # This method traverses any binary operators such as (add) + # # or (multiply). + # def process_binary_op(node) + # # Children aren't decomposed automatically; it is + # # suggested to use Ruby multiple assignment expansion, + # # as it is very convenient here. + # left_expr, right_expr = *node + # + # # AST::Node#updated won't change node type if nil is + # # passed as a first argument, which allows to reuse the + # # same handler for multiple node types using `alias' + # # (below). + # node.updated(nil, [ + # process(left_expr), + # process(right_expr) + # ]) + # end + # alias_method :on_add, :process_binary_op + # alias_method :on_multiply, :process_binary_op + # alias_method :on_divide, :process_binary_op + # + # def on_negate(node) + # # It is also possible to use #process_all for more + # # compact code if every child is a Node. + # node.updated(nil, process_all(node)) + # end + # + # def on_store(node) + # expr, variable_name = *node + # + # # Note that variable_name is not a Node and thus isn't + # # passed to #process. + # node.updated(nil, [ + # process(expr), + # variable_name + # ]) + # end + # + # # (load) is effectively a terminal node, and so it does + # # not need an explicit handler, as the following is the + # # default behavior. Essentially, for any nodes that don't + # # have a defined handler, the node remains unchanged. + # def on_load(node) + # nil + # end + # + # def on_each(node) + # node.updated(nil, process_all(node)) + # end + # end + # + # Let's test our ArithmeticsProcessor: + # + # include AST::Sexp + # expr = s(:add, s(:integer, 2), s(:integer, 2)) + # + # p ArithmeticsProcessor.new.process(expr) == expr # => true + # + # As expected, it does not change anything at all. This isn't + # actually very useful, so let's now define a Calculator, which + # will compute the expression values: + # + # # This Processor folds nonterminal nodes and returns an + # # (integer) terminal node. + # class ArithmeticsCalculator < ArithmeticsProcessor + # def compute_op(node) + # # First, node children are processed and then unpacked + # # to local variables. + # nodes = process_all(node) + # + # if nodes.all? { |node| node.type == :integer } + # # If each of those nodes represents a literal, we can + # # fold this node! + # values = nodes.map { |node| node.children.first } + # AST::Node.new(:integer, [ + # yield(values) + # ]) + # else + # # Otherwise, we can just leave the current node in the + # # tree and only update it with processed children + # # nodes, which can be partially folded. + # node.updated(nil, nodes) + # end + # end + # + # def on_add(node) + # compute_op(node) { |left, right| left + right } + # end + # + # def on_multiply(node) + # compute_op(node) { |left, right| left * right } + # end + # end + # + # Let's check: + # + # p ArithmeticsCalculator.new.process(expr) # => (integer 4) + # + # Excellent, the calculator works! Now, a careful reader could + # notice that the ArithmeticsCalculator does not know how to + # divide numbers. What if we pass an expression with division to + # it? + # + # expr_with_division = \ + # s(:add, + # s(:integer, 1), + # s(:divide, + # s(:add, s(:integer, 8), s(:integer, 4)), + # s(:integer, 3))) # 1 + (8 + 4) / 3 + # + # folded_expr_with_division = ArithmeticsCalculator.new.process(expr_with_division) + # p folded_expr_with_division + # # => (add + # # (integer 1) + # # (divide + # # (integer 12) + # # (integer 3))) + # + # As you can see, the expression was folded _partially_: the inner + # `(add)` node which could be computed was folded to + # `(integer 12)`, the `(divide)` node is left as-is because there + # is no computing handler for it, and the root `(add)` node was + # also left as it is because some of its children were not + # literals. + # + # Note that this partial folding is only possible because the + # _data_ format, i.e. the format in which the computed values of + # the nodes are represented, is the same as the AST itself. + # + # Let's extend our ArithmeticsCalculator class further. + # + # class ArithmeticsCalculator + # def on_divide(node) + # compute_op(node) { |left, right| left / right } + # end + # + # def on_negate(node) + # # Note how #compute_op works regardless of the operator + # # arity. + # compute_op(node) { |value| -value } + # end + # end + # + # Now, let's apply our renewed ArithmeticsCalculator to a partial + # result of previous evaluation: + # + # p ArithmeticsCalculator.new.process(expr_with_division) # => (integer 5) + # + # Five! Excellent. This is also pretty much how CRuby 1.8 executed + # its programs. + # + # Now, let's do some automated bug searching. Division by zero is + # an error, right? So if we could detect that someone has divided + # by zero before the program is even run, that could save some + # debugging time. + # + # class DivisionByZeroVerifier < ArithmeticsProcessor + # class VerificationFailure < Exception; end + # + # def on_divide(node) + # # You need to process the children to handle nested divisions + # # such as: + # # (divide + # # (integer 1) + # # (divide (integer 1) (integer 0)) + # left, right = process_all(node) + # + # if right.type == :integer && + # right.children.first == 0 + # raise VerificationFailure, "Ouch! This code divides by zero." + # end + # end + # + # def divides_by_zero?(ast) + # process(ast) + # false + # rescue VerificationFailure + # true + # end + # end + # + # nice_expr = \ + # s(:divide, + # s(:add, s(:integer, 10), s(:integer, 2)), + # s(:integer, 4)) + # + # p DivisionByZeroVerifier.new.divides_by_zero?(nice_expr) + # # => false. Good. + # + # bad_expr = \ + # s(:add, s(:integer, 10), + # s(:divide, s(:integer, 1), s(:integer, 0))) + # + # p DivisionByZeroVerifier.new.divides_by_zero?(bad_expr) + # # => true. WHOOPS. DO NOT RUN THIS. + # + # Of course, this won't detect more complex cases... unless you + # use some partial evaluation before! The possibilites are + # endless. Have fun. + module Behavior + # Dispatches `node`. If a node has type `:foo`, then a handler + # named `on_foo` is invoked with one argument, the `node`; if + # there isn't such a handler, {#handler_missing} is invoked + # with the same argument. + # + # If the handler returns `nil`, `node` is returned; otherwise, + # the return value of the handler is passed along. + # + # @param [AST::Node, nil] node + # @return [AST::Node, nil] + def process(node) + return if node.nil? + + node = node.to_ast + + # Invoke a specific handler + on_handler = :"on_#{node.type}" + if respond_to? on_handler + new_node = send on_handler, node + else + new_node = handler_missing(node) + end + + node = new_node if new_node + + node + end + + # {#process}es each node from `nodes` and returns an array of + # results. + # + # @param [Array] nodes + # @return [Array] + def process_all(nodes) + nodes.to_a.map do |node| + process node + end + end + + # Default handler. Does nothing. + # + # @param [AST::Node] node + # @return [AST::Node, nil] + def handler_missing(node) + end + end + end +end