This repository has been archived by the owner on Nov 2, 2019. It is now read-only.
/
cfg_reduce.rb
700 lines (564 loc) · 23.6 KB
/
cfg_reduce.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
module Furnace::AVM2
module Transform
class CFGReduce
def initialize(options={})
@verbose = options[:verbose] || false
#@verbose = true
end
def transform(cfg)
@cfg = cfg
@dom = @cfg.dominators
@loops = @cfg.identify_loops
@visited = Set.new
@loop_tails = {}
@loop_nonlocal = Set.new
@postcond_heads = Set.new
@postcond_tails = Set.new
@try_tails = Hash.new { |h,k| h[k] = Set.new }
ast, = extended_block(@cfg.entry)
@visited.add @cfg.exit
if @visited != @cfg.nodes
raise "failsafe: not all blocks visited (#{(@cfg.nodes - @visited).map(&:label).join(", ")} left)"
end
ast
end
def possibly_wrap_eh(block, nodes, exception, loop_stack, nesting)
if nodes.any?
if exception.nil?
nodes
else
log nesting, "exception dispatcher"
unless exception.cti.type == :exception_dispatch
raise "invalid exception cti"
end
@visited.add exception
catches = exception.cti.children
handlers = []
root, *tails = find_merge_point([ block ] + exception.targets)
exception.targets.zip(tails).each_with_index do |(target, tail), index|
log nesting, "handler #{catches[index].inspect}"
handler = extended_block(target, tail || root, loop_stack, nesting + 1, exception.exception)
node = catches[index]
if node.type == :catch
exc_name, var_name = node.children
handlers.push AST::Node.new(:catch, [
exc_name, var_name,
handler
], node.metadata)
elsif node.type == :finally
handlers.push AST::Node.new(:finally, [
handler
], node.metadata)
else
raise "unknown handler type #{node.type}"
end
end
eh_nodes = [ AST::Node.new(:try, [
AST::Node.new(:begin, nodes),
] + handlers) ]
if tails.any? && tails.uniq.count == 1 &&
@dom[tails.first].include?(exception)
# Handle a special case whether control doesn't flow after tails
# of the catches by its own, e.g. the last statement of try
# block is return.
tail_block = tails.first
elsif @try_tails.has_key? exception
# Handle a special case where control falls through more than
# one level of scopes.
if @try_tails[exception].count > 1
raise "multiple try block exit points"
end
tail_block = @try_tails[exception].first
end
if tail_block
tail_code = extended_block(tail_block, nil, loop_stack, nesting + 1, nil)
eh_nodes.concat tail_code.children
end
eh_nodes
end
else
[]
end
end
def is_loop_head?(block, loop_stack)
(@loops.include?(block) || @postcond_tails.include?(block)) &&
loop_stack.include?(block)
end
def is_loop_tail?(block, loop_stack)
@loop_tails.include?(block) &&
loop_stack.include?(@loop_tails[block])
end
def extended_block(block, stopgap=nil, loop_stack=[], nesting=0, upper_exc=nil, options={})
nodes = []
prev_block = nil
current_exception = upper_exc
current_nodes = []
exception_changed = false
log nesting, "--- STOPGAP: #{stopgap.label.inspect}" if stopgap
while block
log nesting, "BLOCK: #{block.label.inspect}"
if is_loop_head?(block, loop_stack)
if options[:infinite_loop_head]
# Infinite loop head is a special case where cti_block
# has back edges pointing to it, but just for once it
# should not be turned to (continue) statement.
options.delete(:infinite_loop_head)
else
log nesting, "exit: loop head (continue stmt)"
check_nonlocal_loop(loop_stack, block) do |params|
current_nodes << AST::Node.new(:continue, params)
end
break
end
elsif is_loop_tail?(block, loop_stack)
log nesting, "exit: loop tail (break stmt)"
loop = @loop_tails[block]
check_nonlocal_loop(loop_stack, loop) do |params|
current_nodes << AST::Node.new(:break, params)
end
break
elsif loop_stack.first == block && !@loops.include?(block)
log nesting, "exit: do..while cti block"
break
elsif block == stopgap
log nesting, "exit: stopgap encountered"
break
elsif block.cti && block.cti.type == :exception_dispatch
log nesting, "exit: spurious exception dispatch traverse"
break
elsif !upper_exc.nil? && block.exception.nil? && block != @cfg.exit
log nesting, "exit: leaving try block"
@try_tails[upper_exc].add block
break
elsif block == @cfg.exit
# We have just arrived to exit node.
break
end
log nesting, "EX: #{(current_exception.label if current_exception) || '-'} " <<
"NEW-EX: #{(block.exception.label if block.exception) || '-'}"
if block.exception != current_exception
# Don't wrap with a handler if we're coming within
# a nested scope.
unless block.exception && current_exception &&
block.exception.exception == current_exception
nodes.concat possibly_wrap_eh(prev_block, current_nodes, current_exception, loop_stack, nesting)
end
current_exception = block.exception
current_nodes = []
exception_changed = true
end
if @visited.include? block
raise "failsafe: block #{block.label} already visited"
end
prev_block = block
@visited.add block
if block.cti
if block.cti.type == :lookup_switch
log nesting, "is a switch"
append_instructions(block, current_nodes)
# Group cases pointing to the same blocks of code.
aliases = Hash[block.targets.each_index.
group_by { |index| block.targets[index] }.values.
map { |(main, *others)| [ main, others ] }]
# Find a merge point for all of the case branches.
case_branches = block.targets.values_at(*aliases.keys)
case_merges = find_merge_point(case_branches)
# A possible exit point for the statement is a merge which
# isn't pointed to by a branch. This prediction can fail if
# there are empty cases.
possible_exit_points = (case_merges.compact - case_branches).uniq
if possible_exit_points.count > 1
raise "multiple possible switch exit points at first guess"
end
exit_point = possible_exit_points.first
log nesting, "exit point (first guess): #{exit_point.inspect}"
# Compute case predecessors for fallthrough.
case_predecessors = Hash.new { |h,k| h[k] = Set.new }
case_branches.zip(case_merges).each do |(branch, merge)|
if case_branches.include?(merge)
case_predecessors[merge].add branch
end
end
# One and only one block may have multiple predecessors.
# In this case, it is the actual exit point; switch the
# prediction.
new_exit_point, = case_predecessors.find { |branch, pred| pred.count > 1 }
if new_exit_point
if case_predecessors.find { |branch, pred|
pred.count > 1 && branch != new_exit_point }
raise "multiple possible switch exit points at second guess"
end
exit_point = new_exit_point
log nesting, "exit point (second guess): #{exit_point.inspect}"
end
if exit_point.nil? || @dom[exit_point].include?(stopgap)
exit_point = stopgap
log nesting, "exit point (third guess): stopgap #{stopgap.inspect}"
end
# Flatten the one-element sets.
case_predecessors.each do |branch, pred|
case_predecessors[branch] = pred.first
end
case_successors = case_predecessors.invert
# Generate code for the actual branches. Stopgap is either the
# another case (fallthrough) or exit point.
case_bodies = case_branches.zip(case_merges).map do |(branch, merge)|
if case_branches.include? merge
branch_stopgap = merge
else
branch_stopgap = exit_point
end
extended_block(branch, branch_stopgap, loop_stack, nesting + 1, current_exception)
end
node = AST::Node.new(:begin)
# Sort the nodes in the order of fallthrough precedence
# and assemble the body AST.
case_pool = case_branches.dup
while case_pool.any?
next_branch = case_pool.find { |c| !case_predecessors.has_key?(c) }
if next_branch.nil?
raise "circular dependency between cases"
end
body = nil
while next_branch
case_pool.delete next_branch
if body && !case_predecessors.has_key?(next_branch)
body.children << AST::Node.new(:break)
end
main_index = block.targets.index(next_branch)
body = case_bodies[case_branches.index(next_branch)]
[ main_index, *aliases[main_index] ].each do |index|
if index == 0
node.children << AST::Node.new(:default)
else
node.children << AST::Node.new(:case, [
AST::Node.new(:integer, [ index - 1 ])
])
end
end
node.children << body
next_branch = case_successors[next_branch]
end
body.children << AST::Node.new(:break)
end
current_nodes << AST::Node.new(:switch, [
block.cti.children.last,
node
])
if reachable?(exit_point, [ block ])
block = exit_point
else
block = nil
end
elsif @loops.include?(block) && !@postcond_heads.include?(block)
# we're trapped in a strange loop
if block.insns.first == block.cti &&
!(@loops[block].include?(block.targets.first) &&
@loops[block].include?(block.targets.last))
# Make sure that both branch targets don't reside within the
# loop. If they do, it's a do-while loop.
log nesting, "is a while loop"
loop_type = :head_cti
cti_block = block
else
back_edges = []
@loops[block].each do |loop_block|
loop_block.targets.each do |target|
# Find a back edge
if @dom[loop_block].include? target
back_edges << loop_block
end
end
end
if back_edges.count == 1 &&
back_edges.first.cti
log nesting, "is a do-while loop"
loop_type = :tail_cti
cti_block = back_edges.first
else
log nesting, "is an infinite loop"
loop_type = :infinite
cti_block = block
end
end
if loop_type == :infinite
in_root, out_root = block, nil
# out_root = nil is not correct in all cases, i.e.
# when multiple breaks are present.
expr = AST::Node.new(:true)
else
reverse = !cti_block.cti.children[0]
in_root, out_root = cti_block.targets
if !@loops[block].include?(in_root)
# One of the branch targets should reside within
# the loop.
in_root, out_root = out_root, in_root
reverse = !reverse
end
# If we reversed the roots or it was a (jump-if false),
# then reverse the condition.
expr = normalize_cti_expr(cti_block, reverse)
end
# Mark the loop tail so we could detect `break' and
# `continue' statements.
@loop_tails[out_root] = cti_block
# Remove the block from visited set if it is unrelated to the
# current loop condition, as it should be re-processed.
if loop_type != :head_cti
@visited.delete block
@postcond_heads.add block
@postcond_tails.add cti_block
end
# Handle a special case: all code in the loop header.
if loop_type == :tail_cti && cti_block == block
body = AST::Node.new(:begin)
append_instructions(block, body.children)
else
body = extended_block(in_root, nil, [ cti_block ] + loop_stack, nesting + 1, current_exception,
{ infinite_loop_head: (loop_type == :infinite) })
end
# [(label name)]
# We first parse the body and then add the label before
# the loop body if anything in the body requires that label
# to be present.
if @loop_nonlocal.include?(block)
current_nodes << AST::Node.new(:label, [ loop_label(block) ])
end
# Map loop types to node types.
if loop_type == :head_cti || loop_type == :infinite
loop_node = :while
else
loop_node = :do_while
end
# (while|do-while (condition)
# (body ...))
current_nodes << AST::Node.new(loop_node, [
expr,
body
])
# Add cti_block to visited for the do-while case.
if loop_type == :tail_cti
@visited.add cti_block
end
block = out_root
elsif block.cti.type == :throw
log nesting, "ends with throw"
append_instructions(block, current_nodes, true)
block = nil
else
log nesting, "is a conditional"
append_instructions(block, current_nodes)
# This is an `if'.
reverse = !block.cti.children[0]
left_root, right_root = block.targets
# (if (condition)
# (if-true ...)
# [(if-false ...)])
# Note that you cannot reach expression if-true nor
# expression if-false without evaluating condition.
# Thus, to go inside the if body, a root has to be
# completely dominated by this block--that is, does
# not have edges coming to it even from other blocks
# dominated by this block.
# A special case: empty if().
if left_root == right_root
current_nodes << AST::Node.new(:if, [
block.cti.children[1],
AST::Node.new(:begin)
])
block = left_root
next
end
# If the left root isn't dominated by block,
# then it can't be `if' branch.
if !completely_dominated?(left_root, block)
left_root, right_root = right_root, left_root
reverse = !reverse
end
# If the left root still isn't dominated by block,
# then this is not a proper conditional.
# If the left root leads to a loop head or tail, then
# the code will not be generated for that root, and
# its dominance is irrelevant.
if !completely_dominated?(left_root, block) &&
!(is_loop_head?(left_root, loop_stack) ||
is_loop_tail?(left_root, loop_stack))
raise "not well-formed if"
end
# If the right root is dominated by this block, which
# means that we have an `else' part, and if the condition
# is reversed, turn that back. This serves purely aesthetical
# purposes and depends on behavior of ASC code generator.
if completely_dominated?(right_root, block) && reverse
left_root, right_root = right_root, left_root
reverse = false
end
# If we reversed the roots or it was a (jump-if false),
# then reverse the condition.
expr = normalize_cti_expr(block, reverse)
# Does this conditional have an `else' block?
if completely_dominated?(right_root, block)
# Yes. Find merge point.
merge_left, merge_right = find_merge_point([ left_root, right_root ])
# One or both of the merge points could be nil, but they will
# not be different.
merge = merge_left || merge_right
# If the merge search did not yield a valid node, use
# stopgap for the current block to avoid runaway code
# synthesis.
#
# The stopgap block is actually an innermost block from
# a stopgap block set implicitly represented by a set of
# objects contained in arguments of recursive calls. As
# the `if's are fully nested when well-formed, we can only
# check for collision with innermost stopgap block.
log nesting, "left"
left_code = extended_block(left_root, merge || stopgap, loop_stack, nesting + 1, current_exception)
log nesting, "right"
right_code = extended_block(right_root, merge || stopgap, loop_stack, nesting + 1, current_exception)
current_nodes << AST::Node.new(:if, [ expr, left_code, right_code ])
block = merge
else
# No. The "right root" is actually post-if code.
log nesting, "one-way"
code = extended_block(left_root, right_root, loop_stack, nesting + 1, current_exception)
current_nodes << AST::Node.new(:if, [ expr, code ])
block = right_root
end
end
elsif block.targets.count == 1
append_instructions(block, current_nodes)
block = block.targets.first
else
raise "invalid target count (#{block.targets.count})"
end
end
if exception_changed || nesting == 0
nodes.concat possibly_wrap_eh(prev_block, current_nodes, current_exception, loop_stack, nesting)
else
nodes = current_nodes
end
AST::Node.new(:begin, nodes)
end
# Block B is completely dominated by another block D if
# it is dominated by D and no edges ever lead to block B
# from any other block, including those dominated by D,
# but excluding any back edges.
def completely_dominated?(block, dominator)
if @loops.include?(block)
(block.sources - @loops[block].to_a) == [dominator]
else
block.sources == [dominator]
end
end
# Check if a block is reachable from sources.
def reachable?(block, sources)
worklist = sources.to_set
visited = Set[]
while worklist.any?
node = worklist.first
worklist.delete node
return true if node == block
visited.add node
node.targets.each do |target|
# Skip visited nodes.
if visited.include?(target)
next
end
# Skip back edges.
if @dom[node].include?(target)
next
end
worklist.add target
end
end
false
end
# Find a set of merge points for a set of partially diverged
# paths beginning from `heads'.
# E.g. here:
#
# ----<---
# / \
# A -------- B --
# |\ \
# | C ---- E - F--- <exit>
# | /
# \- D -
#
# with A as the root and B, C and E as heads, the function reports
# following merge points: E for C and D, and nil for B.
# Note that back edge (denoted by < in the picture) is ignored.
def find_merge_point(heads)
seen = Set[]
heads.map do |head|
# Trail is an ordered collection of nodes encountered during
# BFS. Order of nodes with same rank is irrelevant.
trail = []
worklist = Set[head]
visited = Set[head]
while worklist.any?
node = worklist.first
worklist.delete node
visited.add node
trail.push node
# Nodes which are dominated by the current head aren't relevant
# for merge point search and will cause false positives.
unless @dom[node].include? head
seen.add node
end
node.targets.each do |target|
# Skip visited nodes.
if visited.include?(target)
next
end
# Skip back edges.
if @loops[target] && @loops[target].include?(node)
next
end
worklist.add target
end
end
trail
end.map do |(head, *trail)|
trail.find do |trail_elem|
seen.include?(trail_elem)
end
end.map do |tail|
tail unless tail == @cfg.exit
end
end
# Check if the control transfer is nonlocal according to the
# innermost loop and adjust @loop_nonlocal accordingly for labels
# to be inserted where appropriate.
def check_nonlocal_loop(loop_stack, block)
if loop_stack.first != block
@loop_nonlocal.add block
yield [loop_label(block)]
else
yield []
end
end
def append_instructions(block, nodes, cti=false)
block.insns.each do |insn|
next if insn.equal?(block.cti) && !cti
nodes << insn
end
end
def loop_label(block)
"label#{block.label}"
end
def normalize_cti_expr(block, negate)
if negate
AST::Node.new(:!, [ block.cti.children[1] ])
else
block.cti.children[1]
end
end
private
def log(nesting, what)
$stderr.puts "CFGr: #{" " * nesting}#{what}" if @verbose
end
end
end
end