diff --git a/ext/curl/tests/curl_basic_022.phpt b/ext/curl/tests/curl_basic_022.phpt index e905dfd885d16..4a2177e06bd47 100644 --- a/ext/curl/tests/curl_basic_022.phpt +++ b/ext/curl/tests/curl_basic_022.phpt @@ -11,10 +11,10 @@ curl_setopt($ch, CURLOPT_COOKIELIST, 'Set-Cookie: C2=v2; expires=Thu, 31-Dec-203 var_dump(curl_getinfo($ch, CURLINFO_COOKIELIST)); ?> ---EXPECT-- +--EXPECTF-- array(2) { [0]=> - string(38) ".php.net TRUE / FALSE 2145916799 C1 v1" + string(38) ".php.net TRUE / FALSE %d C1 v1" [1]=> - string(38) ".php.net TRUE / FALSE 2145916799 C2 v2" + string(38) ".php.net TRUE / FALSE %d C2 v2" } diff --git a/ext/gd/gd.c b/ext/gd/gd.c index b712861830bc9..d43ecab0d9c74 100644 --- a/ext/gd/gd.c +++ b/ext/gd/gd.c @@ -3956,6 +3956,11 @@ PHP_FUNCTION(imagescale) im = php_gd_libgdimageptr_from_zval_p(IM); + if (tmp_h < 0 && tmp_w < 0) { + zend_value_error("Argument #2 ($width) and argument #3 ($height) cannot be both negative"); + RETURN_THROWS(); + } + if (tmp_h < 0 || tmp_w < 0) { /* preserve ratio */ long src_x, src_y; diff --git a/ext/gd/tests/gh17703.phpt b/ext/gd/tests/gh17703.phpt new file mode 100644 index 0000000000000..4677b6a501391 --- /dev/null +++ b/ext/gd/tests/gh17703.phpt @@ -0,0 +1,17 @@ +--TEST-- +GH-17703 both width and height value being negative triggers ValueError on width. +--EXTENSIONS-- +gd +--FILE-- +getMessage(); +} +?> +--EXPECT-- +Argument #2 ($width) and argument #3 ($height) cannot be both negative diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index b96d47461196c..772eea7a5d78a 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -1383,9 +1383,16 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); + ir_insn *var_insn; + int32_t offset; IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); + var_insn = &ctx->ir_base[src]; + if (var_insn->op == IR_VADDR) { + var_insn = &ctx->ir_base[var_insn->op1]; + } + IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if (aarch64_may_encode_imm12(offset)) { | add Rx(reg), Rx(base), #offset } else { @@ -5680,10 +5687,15 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_ref *ops = insn->ops; IR_REGSET_EXCL(available, reg); - if (constraints.tmp_regs[n].num > 0 - && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { - /* rematerialization */ - reg |= IR_REG_SPILL_LOAD; + if (constraints.tmp_regs[n].num > 0) { + if (IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { + /* rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } else if (ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_ALLOCA || + ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_VADDR) { + /* local address rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 12103a174d07c..be8744ef198fd 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -1059,7 +1059,7 @@ int ir_schedule(ir_ctx *ctx) if (ctx->flags & IR_DEBUG_SCHEDULE) { fprintf(stderr, "After Schedule\n"); for (i = 1; i != 0; i = _next[i]) { - fprintf(stderr, "%d -> %d\n", i, _blocks[i]); + fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]); } } #endif @@ -1328,11 +1328,13 @@ int ir_schedule(ir_ctx *ctx) new_ctx.cfg_edges = ctx->cfg_edges; ctx->cfg_blocks = NULL; ctx->cfg_edges = NULL; + ir_code_buffer *saved_code_buffer = ctx->code_buffer; ir_free(ctx); IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit); IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit); memcpy(ctx, &new_ctx, sizeof(ir_ctx)); + ctx->code_buffer = saved_code_buffer; ctx->flags2 |= IR_LINEAR; ir_mem_free(_next); diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index f980b86b89320..e28897e0dc407 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1013,8 +1013,10 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) #define IR_HAS_FP_RET_SLOT (1<<10) #define IR_16B_FRAME_ALIGNMENT (1<<11) +/* Temporary: MEM2SSA -> SCCP */ +#define IR_MEM2SSA_VARS (1<<25) + /* Temporary: SCCP -> CFG */ -#define IR_SCCP_DONE (1<<25) #define IR_CFG_REACHABLE (1<<26) /* Temporary: Dominators -> Loops */ diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 680e86c508657..996847d58a7e7 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -12,27 +12,48 @@ #include "ir.h" #include "ir_private.h" +#define IR_COMBO_COPY_PROPAGATION 1 + #define IR_TOP IR_UNUSED #define IR_BOTTOM IR_LAST_OP #define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0) #define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0) -#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP) -#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM) -#define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP) +#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].op == IR_TOP) +#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].op == IR_BOTTOM) +#define IR_IS_REACHABLE(ref) _ir_is_reachable_ctrl(ctx, _values, ref) +#define IR_IS_CONST(ref) (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op)) -#define IR_COMBO_COPY_PROPAGATION 1 +IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref) +{ + IR_ASSERT(!IR_IS_CONST_REF(ref)); + IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL); + return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */ +} #if IR_COMBO_COPY_PROPAGATION -IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a) +IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a) { if (a > 0 && _values[a].op == IR_COPY) { - a = _values[a].op1; - IR_ASSERT(a < 0 || _values[a].op != IR_COPY); /* this may be a copy of symbolic constant */ + do { + a = _values[a].op1; + } while (a > 0 && _values[a].op == IR_COPY); + IR_ASSERT(a < 0 || _values[a].op == IR_BOTTOM); + IR_ASSERT(a > 0 || IR_IS_SYM_CONST(ctx->ir_base[a].op)); } return a; } + +static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst, ir_type type) +{ + IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY); + IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)) + || (src < 0 && IR_IS_SYM_CONST(ctx->ir_base[src].op))); + IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst); + _values[dst].optx = IR_OPT(IR_COPY, type); + _values[dst].op1 = src; +} #endif static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) @@ -40,9 +61,9 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t o ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; #if IR_COMBO_COPY_PROPAGATION - op1 = ir_sccp_identity(_values, op1); - op2 = ir_sccp_identity(_values, op2); - op3 = ir_sccp_identity(_values, op3); + op1 = ir_sccp_identity(ctx, _values, op1); + op2 = ir_sccp_identity(ctx, _values, op2); + op3 = ir_sccp_identity(ctx, _values, op3); #endif restart: @@ -58,33 +79,31 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t o op3 = ctx->fold_insn.op3; goto restart; case IR_FOLD_DO_EMIT: - IR_MAKE_BOTTOM(res); - return 1; + goto make_bottom; case IR_FOLD_DO_COPY: op1 = ctx->fold_insn.op1; -#if IR_COMBO_COPY_PROPAGATION - op1 = ir_sccp_identity(_values, op1); -#endif insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; if (IR_IS_CONST_OP(insn->op)) { /* pass */ -#if IR_COMBO_COPY_PROPAGATION - } else if (_values[res].optx == IR_TOP) { - _values[res].optx = IR_OPT(IR_COPY, insn->type); - _values[res].op1 = op1; - return 1; - } else if (_values[res].op == IR_COPY && _values[res].op1 == op1) { - return 0; /* not changed */ } else { - IR_ASSERT(_values[res].optx != IR_BOTTOM); - /* we don't check for widening */ - _values[res].optx = IR_OPT(IR_COPY, insn->type); - _values[res].op1 = op1; +#if IR_COMBO_COPY_PROPAGATION + if (_values[res].op == IR_TOP) { + /* pass to new copy */ + } else if (_values[res].op == IR_COPY) { + if (ir_sccp_identity(ctx, _values, _values[res].op1) == ir_sccp_identity(ctx, _values, op1)) { + return 0; /* not changed */ + } else { + goto make_bottom; + } + } else { + IR_ASSERT(_values[res].op != IR_BOTTOM); + /* we don't check for widening */ + } + /* create new COPY */ + ir_sccp_add_identity(ctx, _values, op1, res, insn->type); return 1; #else - } else { - IR_MAKE_BOTTOM(res); - return 1; + goto make_bottom; #endif } break; @@ -100,11 +119,13 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t o _values[res].optx = IR_OPT(insn->type, insn->type); _values[res].val.u64 = insn->val.u64; return 1; - } else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) { - IR_MAKE_BOTTOM(res); - return 1; + } else if (_values[res].opt == IR_OPT(insn->type, insn->type) && _values[res].val.u64 == insn->val.u64) { + return 0; /* not changed */ } - return 0; /* not changed */ + +make_bottom: + IR_MAKE_BOTTOM(res); + return 1; } static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *insn, ir_bitqueue *worklist) @@ -112,14 +133,16 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i ir_ref j, n, input, *merge_input, *p; ir_insn *v, *new_const = NULL; #if IR_COMBO_COPY_PROPAGATION - ir_ref new_copy; + ir_ref new_copy = IR_UNUSED; + ir_ref new_copy_identity = IR_UNUSED; + ir_ref phi_identity = ir_sccp_identity(ctx, _values, i); #endif - if (!IR_IS_FEASIBLE(insn->op1)) { + if (!IR_IS_REACHABLE(insn->op1)) { return 0; } n = insn->inputs_count; - if (n > 3 && _values[i].optx == IR_TOP) { + if (n > 3 && _values[i].op == IR_TOP) { for (j = 0; j < (n>>2); j++) { _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ } @@ -129,7 +152,7 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i merge_input = ctx->ir_base[insn->op1].ops + 1; for (; --n > 0; p++, merge_input++) { IR_ASSERT(*merge_input > 0); - if (_values[*merge_input].optx == IR_TOP) { + if (!IR_IS_REACHABLE(*merge_input)) { continue; } @@ -150,25 +173,30 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { input = v->op1; - IR_ASSERT(input < 0 || _values[input].op != IR_COPY); + new_copy_identity = ir_sccp_identity(ctx, _values, input); + if (new_copy_identity == phi_identity) { + new_copy_identity = IR_UNUSED; + continue; + } new_copy = input; goto next; +#endif } else if (v->op == IR_BOTTOM) { - new_copy = input; +#if IR_COMBO_COPY_PROPAGATION + if (input == phi_identity) { + continue; + } + new_copy = new_copy_identity = input; goto next; #else - } else if (v->op == IR_BOTTOM) { - IR_MAKE_BOTTOM(i); - return 1; + goto make_bottom; #endif } } - new_copy = IR_UNUSED; new_const = v; goto next; } - IR_ASSERT(_values[i].optx == IR_TOP); return 0; next: @@ -177,12 +205,17 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i /* for all live merge inputs */ for (; --n > 0; p++, merge_input++) { IR_ASSERT(*merge_input > 0); - if (_values[*merge_input].optx == IR_TOP) { + if (!IR_IS_REACHABLE(*merge_input)) { continue; } input = *p; if (IR_IS_CONST_REF(input)) { +#if IR_COMBO_COPY_PROPAGATION + if (new_copy) { + goto make_bottom; + } +#endif v = &ctx->ir_base[input]; } else if (input == i) { continue; @@ -197,58 +230,55 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i continue; #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { - input = v->op1; - IR_ASSERT(input < 0 || _values[input].op != IR_COPY); - if (new_copy == input) { + ir_ref identity = ir_sccp_identity(ctx, _values, v->op1); + + if (identity == phi_identity || identity == new_copy_identity) { continue; - } else { - IR_MAKE_BOTTOM(i); - return 1; } + goto make_bottom; +#endif } else if (v->op == IR_BOTTOM) { - if (new_copy == input) { +#if IR_COMBO_COPY_PROPAGATION + if (input == phi_identity || input == new_copy_identity) { continue; - } else { - IR_MAKE_BOTTOM(i); - return 1; } -#else - } else if (v->op == IR_BOTTOM) { - IR_MAKE_BOTTOM(i); - return 1; #endif + goto make_bottom; } } if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) { - IR_MAKE_BOTTOM(i); - return 1; + goto make_bottom; } } #if IR_COMBO_COPY_PROPAGATION if (new_copy) { - if (_values[i].op == IR_COPY && _values[i].op1 == new_copy) { - return 0; /* not changed */ + if (_values[i].op == IR_COPY) { + if (phi_identity == new_copy_identity) { + return 0; /* not changed */ + } else { + goto make_bottom; + } } else { - IR_ASSERT(_values[i].optx != IR_BOTTOM); + IR_ASSERT(_values[i].op != IR_BOTTOM); /* we don't check for widening */ - _values[i].optx = IR_OPT(IR_COPY, ctx->ir_base[new_copy].type); - _values[i].op1 = new_copy; + ir_sccp_add_identity(ctx, _values, new_copy, i, insn->type); return 1; } } #endif - if (_values[i].optx == IR_TOP) { + if (_values[i].op == IR_TOP) { _values[i].optx = new_const->opt; _values[i].val.u64 = new_const->val.u64; return 1; } else if (_values[i].opt == new_const->opt && _values[i].val.u64 == new_const->val.u64) { return 0; - } else { - IR_MAKE_BOTTOM(i); - return 1; } + +make_bottom: + IR_MAKE_BOTTOM(i); + return 1; } static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn) @@ -285,25 +315,6 @@ static bool ir_is_dead(ir_ctx *ctx, ir_ref ref) return 0; } -static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1) -{ - IR_ASSERT(!IR_IS_CONST_REF(op1)); - - ir_use_list *use_list = &ctx->use_lists[op1]; - ir_ref *p, n = use_list->count; - - for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { - ir_ref use = *p; - ir_insn *use_insn = &ctx->ir_base[use]; - - if (use_insn->optx == optx) { - IR_ASSERT(use_insn->op1 == op1); - return use; - } - } - return IR_UNUSED; -} - static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a) { ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; @@ -321,154 +332,363 @@ static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) return v1->val.u64 == v2->val.u64; } -static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) +#ifdef IR_SCCP_TRACE +static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i) { - ir_ref j, n, *p; - ir_insn *insn; - - CLEAR_USES(ref); - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + j; j <= n; j++, p++) { - *p = IR_UNUSED; + if (IR_IS_BOTTOM(i)) { + fprintf(stderr, "BOTTOM"); + } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { + fprintf(stderr, "CONST("); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "COPY(%d)", _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "TOP"); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "IF(%d)", _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "MERGE(%d)", _values[i].op1); + } else { + fprintf(stderr, "%d", _values[i].op); } } -static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i) { - ir_ref j, n, *p; - ir_insn *insn; - - CLEAR_USES(ref); - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + j; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (input > 0 && _values[input].op > IR_COPY) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } - } - } + fprintf(stderr, "%d. ", i); + ir_sccp_trace_val(ctx, _values, i); } -static void ir_sccp_remove_insn2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i) { - ir_ref j, n, *p; - ir_insn *insn; - - CLEAR_USES(ref); - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + j; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - if (input > 0) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { - /* try to optimize PHI into ABS/MIN/MAX/COND */ - ir_bitqueue_add(worklist, ctx->ir_base[input].op1); - } - } - } + fprintf(stderr, " -> "); + ir_sccp_trace_val(ctx, _values, i); + fprintf(stderr, "\n"); } +#else +# define ir_sccp_trace_start(c, v, i) +# define ir_sccp_trace_end(c, v, i) +#endif -static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) { - ir_ref j, n, *p, use, i; - ir_insn *insn; + ir_ref i, j, n, *p, use; ir_use_list *use_list; + ir_insn *insn, *use_insn; + uint32_t flags; - IR_ASSERT(ref != new_ref); + /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ + worklist->pos = 0; + ir_bitset_incl(worklist->set, 1); + for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) { + IR_ASSERT(_values[i].op != IR_BOTTOM); + ir_sccp_trace_start(ctx, _values, i); + insn = &ctx->ir_base[i]; + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_DATA) { + if (ctx->use_lists[i].count == 0) { + /* dead code */ + continue; + } else if (insn->op == IR_PHI) { + if (!ir_sccp_meet_phi(ctx, _values, i, insn, worklist)) { + continue; + } + } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { + bool may_benefit = 0; + bool has_top = 0; - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (input > 0 && _values[input].op > IR_COPY) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } - } - } + if ((ctx->flags2 & IR_MEM2SSA_VARS) || _values[i].op != IR_TOP) { + may_benefit = 1; + } - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs]; - if (new_ref <= 0) { - /* constant or IR_UNUSED */ - for (; n; p++, n--) { - use = *p; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (_values[use].op > IR_COPY) { - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - if (!i) continue; - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - /* schedule folding */ - ir_bitqueue_add(worklist, use); - } - } - } else { - for (j = 0; j < n; j++, p++) { - use = *p; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (_values[use].optx == IR_BOTTOM) { - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - if (ir_use_list_add(ctx, new_ref, use)) { - /* restore after reallocation */ - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs + j]; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); + n = IR_INPUT_EDGES_COUNT(flags); + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + if (input > 0) { + if (_values[input].op == IR_TOP) { + has_top = 1; + /* do backward propagaton only once */ + if (!_values[input].op1) { + _values[input].op1 = 1; + ir_bitqueue_add(worklist, input); + } + } else if (_values[input].op != IR_BOTTOM) { + /* Perform folding only if some of direct inputs + * is going to be replaced by a constant or copy. + * This approach may miss some folding optimizations + * dependent on indirect inputs. e.g. reassociation. + */ + may_benefit = 1; + } + } } - /* schedule folding */ - ir_bitqueue_add(worklist, use); + if (has_top) { + continue; + } + if (!may_benefit) { + IR_MAKE_BOTTOM(i); + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { + ir_bitqueue_add(iter_worklist, i); + } + } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { + /* not changed */ + continue; + } else if (_values[i].op == IR_BOTTOM) { + insn = &ctx->ir_base[i]; + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { + ir_bitqueue_add(iter_worklist, i); + } + } + } else { + IR_MAKE_BOTTOM(i); } - } - } - CLEAR_USES(ref); -} - -static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) -{ - ir_ref i, j, n, *p, use; - ir_insn *insn; - ir_use_list *use_list; - - IR_ASSERT(ref != new_ref); + } else if (flags & IR_OP_FLAG_BB_START) { + if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { + ir_bitqueue_add(iter_worklist, i); + } + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + ir_ref unfeasible_inputs = 0; - insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + if (n > 3 && _values[i].op == IR_TOP) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + } + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + IR_ASSERT(input > 0); + if (!IR_IS_REACHABLE(input)) { + unfeasible_inputs++; + } + } + if (unfeasible_inputs == 0) { + IR_MAKE_BOTTOM(i); + } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) { + _values[i].optx = IR_MERGE; + _values[i].op1 = unfeasible_inputs; + } else { + continue; + } + } else { + IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1)); + IR_MAKE_BOTTOM(i); + } + } else { + IR_ASSERT(insn->op1 > 0); + if (!IR_IS_REACHABLE(insn->op1)) { + /* control inpt is not feasible */ + continue; + } + if (insn->op == IR_IF) { + if (IR_IS_TOP(insn->op2)) { + /* do backward propagaton only once */ + if (!_values[insn->op2].op1) { + _values[insn->op2].op1 = 1; + ir_bitqueue_add(worklist, insn->op2); + } + continue; + } + if (IR_IS_CONST(insn->op2)) { + bool b = ir_sccp_is_true(ctx, _values, insn->op2); + use_list = &ctx->use_lists[i]; + IR_ASSERT(use_list->count == 2); + p = &ctx->use_edges[use_list->refs]; + use = *p; + use_insn = &ctx->ir_base[use]; + IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); + if ((use_insn->op == IR_IF_TRUE) != b) { + use = *(p+1); + IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); + } + if (_values[i].op == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use; + ir_bitqueue_add(worklist, use); + continue; + } else if (_values[i].op == IR_IF && _values[i].op1 == use) { + continue; + } + } + IR_MAKE_BOTTOM(i); + } else if (insn->op == IR_SWITCH) { + if (IR_IS_TOP(insn->op2)) { + /* do backward propagaton only once */ + if (!_values[insn->op2].op1) { + _values[insn->op2].op1 = 1; + ir_bitqueue_add(worklist, insn->op2); + } + continue; + } + if (IR_IS_CONST(insn->op2)) { + ir_ref use_case = IR_UNUSED; + + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + IR_ASSERT(use > 0); + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_CASE_VAL) { + if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { + use_case = use; + break; + } + } else if (use_insn->op == IR_CASE_DEFAULT) { + use_case = use; + } + } + if (use_case) { + use_insn = &ctx->ir_base[use_case]; + if (_values[i].op == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use_case; + ir_bitqueue_add(worklist, use_case); + continue; + } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) { + continue; + } + } + } + IR_MAKE_BOTTOM(i); + } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) { + /* schedule dead load elimination */ + ir_bitqueue_add(iter_worklist, i); + IR_MAKE_BOTTOM(i); + } else { + IR_MAKE_BOTTOM(i); + + /* control, call, load and store instructions may have unprocessed inputs */ + n = IR_INPUT_EDGES_COUNT(flags); + if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + for (j = 2, p = insn->ops + j; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + use = *p; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + } + } else if (n >= 2) { + IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); + use = insn->op2; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + if (n > 2) { + IR_ASSERT(n == 3); + IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); + use = insn->op3; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + } + } + } + } + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (_values[use].op != IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCCP) { + for (i = 1; i < ctx->insns_count; i++) { + if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { + fprintf(stderr, "%d. CONST(", i); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")\n"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "%d. TOP\n", i); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); + } else if (!IR_IS_BOTTOM(i)) { + fprintf(stderr, "%d. %d\n", i, _values[i].op); + } + } + } +#endif +} + +/**********************/ +/* SCCP trasformation */ +/**********************/ + +static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) +{ + ir_ref j, n, *p; + ir_insn *insn; + + CLEAR_USES(ref); + insn = &ctx->ir_base[ref]; n = insn->inputs_count; insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + *p = IR_UNUSED; + } +} + +static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p; + ir_insn *insn; + + CLEAR_USES(ref); + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { ir_ref input = *p; *p = IR_UNUSED; - if (input > 0) { + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (input > 0 && _values[input].op > IR_COPY) { ir_use_list_remove_all(ctx, input, ref); if (ir_is_dead(ctx, input)) { /* schedule DCE */ ir_bitqueue_add(worklist, input); - } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { - /* try to optimize PHI into ABS/MIN/MAX/COND */ + } + } + } +} + +static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p, use, i; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (input > 0 && _values[input].op > IR_COPY) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ ir_bitqueue_add(worklist, input); } } @@ -481,126 +701,44 @@ static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bi /* constant or IR_UNUSED */ for (; n; p++, n--) { use = *p; - IR_ASSERT(use != ref); - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - /* schedule folding */ - ir_bitqueue_add(worklist, use); + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (_values[use].op > IR_COPY) { + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + if (!i) continue; + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } } } else { for (j = 0; j < n; j++, p++) { use = *p; - IR_ASSERT(use != ref); - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - if (ir_use_list_add(ctx, new_ref, use)) { - /* restore after reallocation */ - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs + j]; + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (_values[use].op == IR_BOTTOM) { + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + if (ir_use_list_add(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + /* schedule folding */ + ir_bitqueue_add(worklist, use); } - /* schedule folding */ - ir_bitqueue_add(worklist, use); } } CLEAR_USES(ref); } -static void ir_sccp_fold2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) { - uint32_t opt; - ir_ref op1, op2, op3; - ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; - - insn = &ctx->ir_base[ref]; - opt = insn->opt; - op1 = insn->op1; - op2 = insn->op2; - op3 = insn->op3; - -restart: - op1_insn = ctx->ir_base + op1; - op2_insn = ctx->ir_base + op2; - op3_insn = ctx->ir_base + op3; - - switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { - case IR_FOLD_DO_RESTART: - opt = ctx->fold_insn.optx; - op1 = ctx->fold_insn.op1; - op2 = ctx->fold_insn.op2; - op3 = ctx->fold_insn.op3; - goto restart; - case IR_FOLD_DO_EMIT: - insn = &ctx->ir_base[ref]; - if (insn->opt != ctx->fold_insn.opt - || insn->op1 != ctx->fold_insn.op1 - || insn->op2 != ctx->fold_insn.op2 - || insn->op3 != ctx->fold_insn.op3) { - - ir_use_list *use_list; - ir_ref n, j, *p, use; - - insn->optx = ctx->fold_insn.opt; - IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); - insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); - if (insn->op1 != ctx->fold_insn.op1) { - if (insn->op1 > 0) { - ir_use_list_remove_one(ctx, insn->op1, ref); - } - if (ctx->fold_insn.op1 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op1, ref); - } - } - if (insn->op2 != ctx->fold_insn.op2) { - if (insn->op2 > 0) { - ir_use_list_remove_one(ctx, insn->op2, ref); - } - if (ctx->fold_insn.op2 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op2, ref); - } - } - if (insn->op3 != ctx->fold_insn.op3) { - if (insn->op3 > 0) { - ir_use_list_remove_one(ctx, insn->op3, ref); - } - if (ctx->fold_insn.op3 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op3, ref); - } - } - insn->op1 = ctx->fold_insn.op1; - insn->op2 = ctx->fold_insn.op2; - insn->op3 = ctx->fold_insn.op3; - - use_list = &ctx->use_lists[ref]; - n = use_list->count; - for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - ir_bitqueue_add(worklist, use); - } - } - break; - case IR_FOLD_DO_COPY: - op1 = ctx->fold_insn.op1; - ir_sccp_replace_insn2(ctx, ref, op1, worklist); - break; - case IR_FOLD_DO_CONST: - op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); - ir_sccp_replace_insn2(ctx, ref, op1, worklist); - break; - default: - IR_ASSERT(0); - break; - } -} - -static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) -{ - ir_ref next; - ir_insn *insn, *next_insn; + ir_ref next; + ir_insn *insn, *next_insn; insn = &ctx->ir_base[ref]; if (ctx->use_lists[dst].count == 1) { @@ -634,44 +772,71 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values /* remove MERGE completely */ for (j = 1; j <= n; j++) { ir_ref input = ir_insn_op(insn, j); - if (input && IR_IS_FEASIBLE(input)) { + if (input && IR_IS_REACHABLE(input)) { ir_insn *input_insn = &ctx->ir_base[input]; IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_LOOP_END|| input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE); if (input_insn->op == IR_END || input_insn->op == IR_LOOP_END) { - if (input < ref) { - ir_ref prev, next = IR_UNUSED; - ir_insn *next_insn = NULL; - - prev = input_insn->op1; - use_list = &ctx->use_lists[ref]; - if (use_list->count == 1) { - next = ctx->use_edges[use_list->refs]; - next_insn = &ctx->ir_base[next]; - } else { - for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { - use = *p; - use_insn = &ctx->ir_base[use]; - IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); - if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { - IR_ASSERT(!next); - next = use; - next_insn = use_insn; - } else if (use_insn->op != IR_NOP) { - IR_ASSERT(use_insn->op1 == ref); - IR_ASSERT(use_insn->op == IR_VAR); - ir_ref region = prev; - while (!IR_IS_BB_START(ctx->ir_base[region].op)) { - region = ctx->ir_base[region].op1; + ir_ref prev, next = IR_UNUSED; + ir_insn *next_insn = NULL; + + prev = input_insn->op1; + use_list = &ctx->use_lists[ref]; + if (use_list->count == 1) { + next = ctx->use_edges[use_list->refs]; + next_insn = &ctx->ir_base[next]; + } else { + k = 0; + p = &ctx->use_edges[use_list->refs]; + while (k < use_list->count) { + use = *p; + use_insn = &ctx->ir_base[use]; +#if IR_COMBO_COPY_PROPAGATION + IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); +#else + if (use_insn->op == IR_PHI) { + /* Convert PHI into COPY */ + ir_ref i, n = use_insn->inputs_count; + + for (i = 2; i <= n; i++) { + if (i != j + 1) { + ir_ref from = ir_insn_op(use_insn, i); + if (from > 0) { + ir_use_list_remove_one(ctx, from, use); + } + ir_insn_set_op(use_insn, i, IR_UNUSED); } - use_insn->op1 = region; - ir_use_list_add(ctx, region, use); - p = &ctx->use_edges[use_list->refs + k]; } + use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1); + use_insn->op1 = ir_insn_op(use_insn, j + 1); + ir_insn_set_op(use_insn, j + 1, IR_UNUSED); + ir_use_list_remove_one(ctx, ref, use); + p = &ctx->use_edges[use_list->refs + k]; + continue; + } +#endif + if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { + IR_ASSERT(!next); + next = use; + next_insn = use_insn; + } else if (use_insn->op != IR_NOP) { + IR_ASSERT(use_insn->op1 == ref); + IR_ASSERT(use_insn->op == IR_VAR); + ir_ref region = prev; + while (!IR_IS_BB_START(ctx->ir_base[region].op)) { + region = ctx->ir_base[region].op1; + } + use_insn->op1 = region; + ir_use_list_add(ctx, region, use); + p = &ctx->use_edges[use_list->refs + k]; } + k++; + p++; } - IR_ASSERT(prev && next); + } + IR_ASSERT(prev && next); + if (prev < next) { /* remove MERGE and input END from double linked control list */ next_insn->op1 = prev; ir_use_list_replace_one(ctx, prev, input, next); @@ -752,236 +917,325 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } } -static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref) +static void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) { - ir_insn *insn = &ctx->ir_base[ref]; + ir_ref i, j; + ir_insn *value; - IR_ASSERT(insn->type == IR_DOUBLE); - if (IR_IS_CONST_REF(ref)) { - return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d; - } else { - switch (insn->op) { - case IR_FP2FP: - return 1; -// case IR_INT2FP: -// return ctx->use_lists[ref].count == 1; - case IR_NEG: - case IR_ABS: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_d2f(ctx, insn->op1); - case IR_ADD: - case IR_SUB: - case IR_MUL: - case IR_DIV: - case IR_MIN: - case IR_MAX: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_d2f(ctx, insn->op1) && - ir_may_promote_d2f(ctx, insn->op2); - default: - break; + for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { + if (value->op == IR_BOTTOM) { + continue; + } else if (IR_IS_CONST_OP(value->op)) { + /* replace instruction by constant */ + j = ir_const(ctx, value->val, value->type); + ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); + } else if (IR_IS_SYM_CONST(value->op)) { + /* replace instruction by constant */ + j = ir_const_ex(ctx, value->val, value->type, value->optx); + ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); +#if IR_COMBO_COPY_PROPAGATION + } else if (value->op == IR_COPY) { + ir_sccp_replace_insn(ctx, _values, i, value->op1, iter_worklist); +#endif + } else if (value->op == IR_TOP) { + /* remove unreachable instruction */ + ir_insn *insn = &ctx->ir_base[i]; + + if (insn->op == IR_NOP) { + /* already removed */ + } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { + if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { + ir_sccp_remove_insn(ctx, _values, i, iter_worklist); + } + } else { + if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { + /* remove from terminators list */ + ir_ref prev = ctx->ir_base[1].op1; + if (prev == i) { + ctx->ir_base[1].op1 = insn->op3; + } else { + while (prev) { + if (ctx->ir_base[prev].op3 == i) { + ctx->ir_base[prev].op3 = insn->op3; + break; + } + prev = ctx->ir_base[prev].op3; + } + } + } + ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist); + } + } else if (value->op == IR_IF) { + /* remove one way IF/SWITCH */ + ir_sccp_remove_if(ctx, _values, i, value->op1); + } else if (value->op == IR_MERGE) { + /* schedule merge to remove unfeasible MERGE inputs */ + ir_bitqueue_add(worklist, i); } } - return 0; + + while ((i = ir_bitqueue_pop(worklist)) >= 0) { + IR_ASSERT(_values[i].op == IR_MERGE); + ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); + } } -static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) +/***************************/ +/* Iterative Optimizations */ +/***************************/ + +static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1) { - ir_insn *insn = &ctx->ir_base[ref]; + IR_ASSERT(!IR_IS_CONST_REF(op1)); - IR_ASSERT(insn->type == IR_FLOAT); - if (IR_IS_CONST_REF(ref)) { - return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f; - } else { - switch (insn->op) { - case IR_FP2FP: - return 1; - case IR_INT2FP: - return ctx->use_lists[ref].count == 1; - case IR_NEG: - case IR_ABS: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_f2d(ctx, insn->op1); - case IR_ADD: - case IR_SUB: - case IR_MUL: -// case IR_DIV: - case IR_MIN: - case IR_MAX: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_f2d(ctx, insn->op1) && - ir_may_promote_f2d(ctx, insn->op2); - default: - break; + ir_use_list *use_list = &ctx->use_lists[op1]; + ir_ref *p, n = use_list->count; + + for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { + ir_ref use = *p; + ir_insn *use_insn = &ctx->ir_base[use]; + + if (use_insn->optx == optx) { + IR_ASSERT(use_insn->op1 == op1); + return use; } } - return 0; + return IR_UNUSED; } -static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) +static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) { - ir_insn *insn = &ctx->ir_base[ref]; - uint32_t count; + ir_ref j, n, *p; + ir_insn *insn; - IR_ASSERT(insn->type == IR_DOUBLE); - if (IR_IS_CONST_REF(ref)) { - return ir_const_float(ctx, (float)insn->val.d); - } else { - switch (insn->op) { - case IR_FP2FP: - count = ctx->use_lists[ref].count; - ir_use_list_remove_all(ctx, ref, use); - if (ctx->use_lists[ref].count == 0) { - ir_use_list_replace_one(ctx, insn->op1, ref, use); - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); - } - ref = insn->op1; - MAKE_NOP(insn); - return ref; - } else { - ir_use_list_add(ctx, insn->op1, use); - count -= ctx->use_lists[ref].count; - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); - } - } - return insn->op1; -// case IR_INT2FP: -// insn->type = IR_FLOAT; -// return ref; - case IR_NEG: - case IR_ABS: - insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); - insn->type = IR_FLOAT; - return ref; - case IR_ADD: - case IR_SUB: - case IR_MUL: - case IR_DIV: - case IR_MIN: - case IR_MAX: - if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); - } else { - insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); - insn->op2 = ir_promote_d2f(ctx, insn->op2, ref); - } - insn->type = IR_FLOAT; - return ref; - default: - break; + CLEAR_USES(ref); + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, input); + } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { + /* try to optimize PHI into ABS/MIN/MAX/COND */ + ir_bitqueue_add(worklist, ctx->ir_base[input].op1); + } } } - IR_ASSERT(0); - return ref; } -static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) +static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) { - ir_insn *insn = &ctx->ir_base[ref]; - uint32_t count; - ir_ref old_ref; + ir_ref i, j, n, *p, use; + ir_insn *insn; + ir_use_list *use_list; - IR_ASSERT(insn->type == IR_FLOAT); - if (IR_IS_CONST_REF(ref)) { - return ir_const_double(ctx, (double)insn->val.f); + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, input); + } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { + /* try to optimize PHI into ABS/MIN/MAX/COND */ + ir_bitqueue_add(worklist, input); + } + } + } + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs]; + if (new_ref <= 0) { + /* constant or IR_UNUSED */ + for (; n; p++, n--) { + use = *p; + IR_ASSERT(use != ref); + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } } else { - switch (insn->op) { - case IR_FP2FP: - count = ctx->use_lists[ref].count; - ir_use_list_remove_all(ctx, ref, use); - if (ctx->use_lists[ref].count == 0) { - ir_use_list_replace_one(ctx, insn->op1, ref, use); - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); + for (j = 0; j < n; j++, p++) { + use = *p; + IR_ASSERT(use != ref); + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + if (ir_use_list_add(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } + } + CLEAR_USES(ref); +} + +static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +{ + uint32_t opt; + ir_ref op1, op2, op3; + ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + + insn = &ctx->ir_base[ref]; + opt = insn->opt; + op1 = insn->op1; + op2 = insn->op2; + op3 = insn->op3; + +restart: + op1_insn = ctx->ir_base + op1; + op2_insn = ctx->ir_base + op2; + op3_insn = ctx->ir_base + op3; + + switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { + case IR_FOLD_DO_RESTART: + opt = ctx->fold_insn.optx; + op1 = ctx->fold_insn.op1; + op2 = ctx->fold_insn.op2; + op3 = ctx->fold_insn.op3; + goto restart; + case IR_FOLD_DO_EMIT: + insn = &ctx->ir_base[ref]; + if (insn->opt != ctx->fold_insn.opt + || insn->op1 != ctx->fold_insn.op1 + || insn->op2 != ctx->fold_insn.op2 + || insn->op3 != ctx->fold_insn.op3) { + + ir_use_list *use_list; + ir_ref n, j, *p, use; + + insn->optx = ctx->fold_insn.opt; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); + insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); + if (insn->op1 != ctx->fold_insn.op1) { + if (insn->op1 > 0) { + ir_use_list_remove_one(ctx, insn->op1, ref); } - ref = insn->op1; - MAKE_NOP(insn); - return ref; - } else { - ir_use_list_add(ctx, insn->op1, use); - count -= ctx->use_lists[ref].count; - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); + if (ctx->fold_insn.op1 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op1, ref); } } - return insn->op1; - case IR_INT2FP: - old_ref = ir_find1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1); - if (old_ref) { - IR_ASSERT(ctx->use_lists[ref].count == 1); - ir_use_list_remove_one(ctx, insn->op1, ref); - CLEAR_USES(ref); - MAKE_NOP(insn); - ir_use_list_add(ctx, old_ref, use); - return old_ref; + if (insn->op2 != ctx->fold_insn.op2) { + if (insn->op2 > 0) { + ir_use_list_remove_one(ctx, insn->op2, ref); + } + if (ctx->fold_insn.op2 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op2, ref); + } } - insn->type = IR_DOUBLE; - return ref; + if (insn->op3 != ctx->fold_insn.op3) { + if (insn->op3 > 0) { + ir_use_list_remove_one(ctx, insn->op3, ref); + } + if (ctx->fold_insn.op3 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op3, ref); + } + } + insn->op1 = ctx->fold_insn.op1; + insn->op2 = ctx->fold_insn.op2; + insn->op3 = ctx->fold_insn.op3; + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + ir_bitqueue_add(worklist, use); + } + } + break; + case IR_FOLD_DO_COPY: + op1 = ctx->fold_insn.op1; + ir_iter_replace_insn(ctx, ref, op1, worklist); + break; + case IR_FOLD_DO_CONST: + op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); + ir_iter_replace_insn(ctx, ref, op1, worklist); + break; + default: + IR_ASSERT(0); + break; + } +} + +static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->type == IR_DOUBLE); + if (IR_IS_CONST_REF(ref)) { + return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d; + } else { + switch (insn->op) { + case IR_FP2FP: + return 1; +// case IR_INT2FP: +// return ctx->use_lists[ref].count == 1; case IR_NEG: case IR_ABS: - insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); - insn->type = IR_DOUBLE; - return ref; + return ctx->use_lists[ref].count == 1 && + ir_may_promote_d2f(ctx, insn->op1); case IR_ADD: case IR_SUB: case IR_MUL: -// case IR_DIV: + case IR_DIV: case IR_MIN: case IR_MAX: - if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); - } else { - insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); - insn->op2 = ir_promote_f2d(ctx, insn->op2, ref); - } - insn->type = IR_DOUBLE; - return ref; + return ctx->use_lists[ref].count == 1 && + ir_may_promote_d2f(ctx, insn->op1) && + ir_may_promote_d2f(ctx, insn->op2); default: break; } } - IR_ASSERT(0); - return ref; + return 0; } -static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) +static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) { ir_insn *insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->type == IR_FLOAT); if (IR_IS_CONST_REF(ref)) { - return !IR_IS_SYM_CONST(insn->op); + return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f; } else { switch (insn->op) { - case IR_ZEXT: - case IR_SEXT: - return ctx->ir_base[insn->op1].type == type; + case IR_FP2FP: + return 1; + case IR_INT2FP: + return ctx->use_lists[ref].count == 1; case IR_NEG: case IR_ABS: - case IR_NOT: return ctx->use_lists[ref].count == 1 && - ir_may_promote_i2i(ctx, type, insn->op1); + ir_may_promote_f2d(ctx, insn->op1); case IR_ADD: case IR_SUB: case IR_MUL: // case IR_DIV: case IR_MIN: case IR_MAX: - case IR_OR: - case IR_AND: - case IR_XOR: return ctx->use_lists[ref].count == 1 && - ir_may_promote_i2i(ctx, type, insn->op1) && - ir_may_promote_i2i(ctx, type, insn->op2); + ir_may_promote_f2d(ctx, insn->op1) && + ir_may_promote_f2d(ctx, insn->op2); default: break; } @@ -989,7 +1243,178 @@ static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) return 0; } -static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) +static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t count; + + IR_ASSERT(insn->type == IR_DOUBLE); + if (IR_IS_CONST_REF(ref)) { + return ir_const_float(ctx, (float)insn->val.d); + } else { + switch (insn->op) { + case IR_FP2FP: + count = ctx->use_lists[ref].count; + ir_use_list_remove_all(ctx, ref, use); + if (ctx->use_lists[ref].count == 0) { + ir_use_list_replace_one(ctx, insn->op1, ref, use); + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + ref = insn->op1; + MAKE_NOP(insn); + return ref; + } else { + ir_use_list_add(ctx, insn->op1, use); + count -= ctx->use_lists[ref].count; + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + } + return insn->op1; +// case IR_INT2FP: +// insn->type = IR_FLOAT; +// return ref; + case IR_NEG: + case IR_ABS: + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + insn->type = IR_FLOAT; + return ref; + case IR_ADD: + case IR_SUB: + case IR_MUL: + case IR_DIV: + case IR_MIN: + case IR_MAX: + if (insn->op1 == insn->op2) { + insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + } else { + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + insn->op2 = ir_promote_d2f(ctx, insn->op2, ref); + } + insn->type = IR_FLOAT; + return ref; + default: + break; + } + } + IR_ASSERT(0); + return ref; +} + +static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t count; + ir_ref old_ref; + + IR_ASSERT(insn->type == IR_FLOAT); + if (IR_IS_CONST_REF(ref)) { + return ir_const_double(ctx, (double)insn->val.f); + } else { + switch (insn->op) { + case IR_FP2FP: + count = ctx->use_lists[ref].count; + ir_use_list_remove_all(ctx, ref, use); + if (ctx->use_lists[ref].count == 0) { + ir_use_list_replace_one(ctx, insn->op1, ref, use); + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + ref = insn->op1; + MAKE_NOP(insn); + return ref; + } else { + ir_use_list_add(ctx, insn->op1, use); + count -= ctx->use_lists[ref].count; + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + } + return insn->op1; + case IR_INT2FP: + old_ref = ir_find1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1); + if (old_ref) { + IR_ASSERT(ctx->use_lists[ref].count == 1); + ir_use_list_remove_one(ctx, insn->op1, ref); + CLEAR_USES(ref); + MAKE_NOP(insn); + ir_use_list_add(ctx, old_ref, use); + return old_ref; + } + insn->type = IR_DOUBLE; + return ref; + case IR_NEG: + case IR_ABS: + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + insn->type = IR_DOUBLE; + return ref; + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + if (insn->op1 == insn->op2) { + insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + } else { + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + insn->op2 = ir_promote_f2d(ctx, insn->op2, ref); + } + insn->type = IR_DOUBLE; + return ref; + default: + break; + } + } + IR_ASSERT(0); + return ref; +} + +static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + if (IR_IS_CONST_REF(ref)) { + return !IR_IS_SYM_CONST(insn->op); + } else { + switch (insn->op) { + case IR_ZEXT: + case IR_SEXT: + return ctx->ir_base[insn->op1].type == type; + case IR_NEG: + case IR_ABS: + case IR_NOT: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_i2i(ctx, type, insn->op1); + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + case IR_OR: + case IR_AND: + case IR_XOR: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_i2i(ctx, type, insn->op1) && + ir_may_promote_i2i(ctx, type, insn->op2); + default: + break; + } + } + return 0; +} + +static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) { ir_insn *insn = &ctx->ir_base[ref]; uint32_t count; @@ -1201,7 +1626,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi } } - ir_sccp_replace_insn2(ctx, ext_ref, ref, worklist); + ir_iter_replace_insn(ctx, ext_ref, ref, worklist); phi_insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(phi_insn->op2) @@ -2061,495 +2486,171 @@ static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqu } else { /* IF Split * - * | | | | - * | END | END - * END / END | - * | +---+ | | - * | / | | - * MERGE | | - * | \ | | - * | PHI(C1, X) | | - * | | | + - * | CMP(_, C2) | / - * | / | / - * IF => | / - * | \ | / - * | +------+ | / - * | IF_TRUE | / BEGIN(unreachable) - * IF_FALSE | MERGE | - * | | - */ - - ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); - ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref); - - MAKE_NOP(merge); CLEAR_USES(merge_ref); - MAKE_NOP(phi); CLEAR_USES(phi_ref); - MAKE_NOP(cond); CLEAR_USES(cond_ref); - MAKE_NOP(insn); CLEAR_USES(ref); - - if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); - if_false->op1 = end1_ref; - if_false->op2 = end2_ref; - - if_true->optx = IR_BEGIN; - if_true->op1 = IR_UNUSED; - - ctx->flags2 &= ~IR_CFG_REACHABLE; - - ir_bitqueue_add(worklist, if_false_ref); - - return 1; - } - } else { - /* IF Split - * - * | | | | - * | END | IF<----+ - * END / END / \ | - * | +---+ | +--+ + | - * | / | / | | - * MERGE | IF_FALSE | | - * | \ | | | | - * | PHI(C1, X) | | | | - * | | | | | | - * | CMP(_, C2) | | | CMP(X, C2) - * | / | | | - * IF => | END | - * | \ | | | - * | +------+ | | | - * | IF_TRUE | | IF_TRUE - * IF_FALSE | MERGE - * | | - */ - - ir_use_list_remove_all(ctx, merge_ref, phi_ref); - ir_use_list_remove_all(ctx, ref, if_true_ref); - if (!IR_IS_CONST_REF(phi->op3)) { - ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2); - } - ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); - ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref); - ir_use_list_add(ctx, end2_ref, if_true_ref); - - end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); - end2->op2 = insn->op2; - - merge->optx = IR_OPTX(op, IR_VOID, 1); - merge->op1 = end2_ref; - merge->op2 = IR_UNUSED; - - cond->op1 = phi->op3; - MAKE_NOP(phi); - CLEAR_USES(phi_ref); - - insn->optx = IR_OPTX(IR_END, IR_VOID, 1); - insn->op1 = merge_ref; - insn->op2 = IR_UNUSED; - - if_true->op1 = end2_ref; - - if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); - if_false->op1 = end1_ref; - if_false->op2 = ref; - - ir_bitqueue_add(worklist, if_false_ref); - if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) { - ir_bitqueue_add(worklist, end2->op1); - } - - return 1; - } - } - } - } - } - - return 0; -} - -static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist) -{ - ir_use_list *use_list = &ctx->use_lists[merge_ref]; - - if (use_list->count == 1) { - ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist); - } else if (use_list->count == 2) { - if (merge->inputs_count == 2) { - ir_ref phi_ref = ctx->use_edges[use_list->refs]; - ir_insn *phi = &ctx->ir_base[phi_ref]; - - ir_ref next_ref = ctx->use_edges[use_list->refs + 1]; - ir_insn *next = &ctx->ir_base[next_ref]; - - if (next->op == IR_PHI) { - SWAP_REFS(phi_ref, next_ref); - SWAP_INSNS(phi, next); - } - - if (phi->op == IR_PHI && next->op != IR_PHI) { - if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) { - if (next->op2 == phi_ref) { - if (ir_try_split_if(ctx, next_ref, next, worklist)) { - return; - } - } else { - ir_insn *cmp = &ctx->ir_base[next->op2]; - - if (cmp->op >= IR_EQ && cmp->op <= IR_UGT - && cmp->op1 == phi_ref - && IR_IS_CONST_REF(cmp->op2) - && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op) - && ctx->use_lists[next->op2].count == 1) { - if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) { - return; - } - } - } - } - ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist); - } - } - } -} - -int ir_sccp(ir_ctx *ctx) -{ - ir_ref i, j, n, *p, use; - ir_use_list *use_list; - ir_insn *insn, *use_insn, *value; - uint32_t flags; - ir_bitqueue worklist, worklist2; - ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); - - ctx->flags2 |= IR_OPT_IN_SCCP; - - /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ - ir_bitqueue_init(&worklist2, ctx->insns_count); - ir_bitqueue_init(&worklist, ctx->insns_count); - worklist.pos = 0; - ir_bitset_incl(worklist.set, 1); - while ((i = ir_bitqueue_pop(&worklist)) >= 0) { - insn = &ctx->ir_base[i]; - flags = ir_op_flags[insn->op]; - if (flags & IR_OP_FLAG_DATA) { - if (ctx->use_lists[i].count == 0) { - /* dead code */ - continue; - } else if (insn->op == IR_PHI) { - if (!ir_sccp_meet_phi(ctx, _values, i, insn, &worklist)) { - continue; - } - } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { - bool may_benefit = 0; - bool has_top = 0; - - IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); - n = IR_INPUT_EDGES_COUNT(flags); - for (p = insn->ops + 1; n > 0; p++, n--) { - ir_ref input = *p; - if (input > 0) { - if (_values[input].optx == IR_TOP) { - has_top = 1; - /* do backward propagaton only once */ - if (!_values[input].op1) { - _values[input].op1 = 1; - ir_bitqueue_add(&worklist, input); - } - } else if (_values[input].optx != IR_BOTTOM) { - /* Perform folding only if some of direct inputs - * is going to be replaced by a constant or copy. - * This approach may miss some folding optimizations - * dependent on indirect inputs. e.g. reassociation. - */ - may_benefit = 1; - } - } - } - if (has_top) { - continue; - } - if (!may_benefit) { - IR_MAKE_BOTTOM(i); - if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC - || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { - ir_bitqueue_add(&worklist2, i); - } - } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { - /* not changed */ - continue; - } else if (_values[i].optx == IR_BOTTOM) { - insn = &ctx->ir_base[i]; - if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC - || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { - ir_bitqueue_add(&worklist2, i); - } - } - } else { - IR_MAKE_BOTTOM(i); - } - } else if (flags & IR_OP_FLAG_BB_START) { - if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { - ir_bitqueue_add(&worklist2, i); - } - if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { - ir_ref unfeasible_inputs = 0; - - n = insn->inputs_count; - if (n > 3 && _values[i].optx == IR_TOP) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - } - for (p = insn->ops + 1; n > 0; p++, n--) { - ir_ref input = *p; - IR_ASSERT(input > 0); - if (_values[input].optx == IR_TOP) { - unfeasible_inputs++; - } - } - if (unfeasible_inputs == 0) { - IR_MAKE_BOTTOM(i); - } else if (_values[i].op1 != unfeasible_inputs) { - _values[i].optx = IR_MERGE; - _values[i].op1 = unfeasible_inputs; - } else { - continue; - } - } else { - IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1)); - IR_MAKE_BOTTOM(i); - } - } else { - IR_ASSERT(insn->op1 > 0); - if (_values[insn->op1].optx == IR_TOP) { - /* control inpt is not feasible */ - continue; - } - if (insn->op == IR_IF) { - if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(&worklist, insn->op2); - } - continue; - } - if (!IR_IS_BOTTOM(insn->op2) -#if IR_COMBO_COPY_PROPAGATION - && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) -#endif - ) { - bool b = ir_sccp_is_true(ctx, _values, insn->op2); - use_list = &ctx->use_lists[i]; - IR_ASSERT(use_list->count == 2); - p = &ctx->use_edges[use_list->refs]; - use = *p; - use_insn = &ctx->ir_base[use]; - IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); - if ((use_insn->op == IR_IF_TRUE) != b) { - use = *(p+1); - IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); - } - if (_values[i].optx == IR_TOP) { - _values[i].optx = IR_IF; - _values[i].op1 = use; - } else if (_values[i].optx != IR_IF || _values[i].op1 != use) { - IR_MAKE_BOTTOM(i); - } - if (!IR_IS_BOTTOM(use)) { - ir_bitqueue_add(&worklist, use); - } - continue; - } - IR_MAKE_BOTTOM(i); - } else if (insn->op == IR_SWITCH) { - if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(&worklist, insn->op2); - } - continue; - } - if (!IR_IS_BOTTOM(insn->op2) -#if IR_COMBO_COPY_PROPAGATION - && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) -#endif - ) { - ir_ref use_case = IR_UNUSED; + * | | | | + * | END | END + * END / END | + * | +---+ | | + * | / | | + * MERGE | | + * | \ | | + * | PHI(C1, X) | | + * | | | + + * | CMP(_, C2) | / + * | / | / + * IF => | / + * | \ | / + * | +------+ | / + * | IF_TRUE | / BEGIN(unreachable) + * IF_FALSE | MERGE | + * | | + */ - use_list = &ctx->use_lists[i]; - n = use_list->count; - for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - IR_ASSERT(use > 0); - use_insn = &ctx->ir_base[use]; - if (use_insn->op == IR_CASE_VAL) { - if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { - use_case = use; - break; - } - } else if (use_insn->op == IR_CASE_DEFAULT) { - use_case = use; - } - } - if (use_case) { - use_insn = &ctx->ir_base[use_case]; - if (_values[i].optx == IR_TOP) { - _values[i].optx = IR_IF; - _values[i].op1 = use_case; - } else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) { - IR_MAKE_BOTTOM(i); - } - if (!IR_IS_BOTTOM(use_case)) { - ir_bitqueue_add(&worklist, use_case); + ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref); + + MAKE_NOP(merge); CLEAR_USES(merge_ref); + MAKE_NOP(phi); CLEAR_USES(phi_ref); + MAKE_NOP(cond); CLEAR_USES(cond_ref); + MAKE_NOP(insn); CLEAR_USES(ref); + + if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); + if_false->op1 = end1_ref; + if_false->op2 = end2_ref; + + if_true->optx = IR_BEGIN; + if_true->op1 = IR_UNUSED; + + ctx->flags2 &= ~IR_CFG_REACHABLE; + + ir_bitqueue_add(worklist, if_false_ref); + + return 1; } - } - if (!IR_IS_BOTTOM(i)) { - continue; - } - } - IR_MAKE_BOTTOM(i); - } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) { - /* dead load */ - _values[i].optx = IR_LOAD; - } else { - IR_MAKE_BOTTOM(i); + } else { + /* IF Split + * + * | | | | + * | END | IF<----+ + * END / END / \ | + * | +---+ | +--+ + | + * | / | / | | + * MERGE | IF_FALSE | | + * | \ | | | | + * | PHI(C1, X) | | | | + * | | | | | | + * | CMP(_, C2) | | | CMP(X, C2) + * | / | | | + * IF => | END | + * | \ | | | + * | +------+ | | | + * | IF_TRUE | | IF_TRUE + * IF_FALSE | MERGE + * | | + */ - /* control, call, load and store instructions may have unprocessed inputs */ - n = IR_INPUT_EDGES_COUNT(flags); - if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - for (j = 2, p = insn->ops + j; j <= n; j++, p++) { - IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); - use = *p; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); + ir_use_list_remove_all(ctx, merge_ref, phi_ref); + ir_use_list_remove_all(ctx, ref, if_true_ref); + if (!IR_IS_CONST_REF(phi->op3)) { + ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2); } - } - } else if (n >= 2) { - IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); - use = insn->op2; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); - } - if (n > 2) { - IR_ASSERT(n == 3); - IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); - use = insn->op3; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); + ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref); + ir_use_list_add(ctx, end2_ref, if_true_ref); + + end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); + end2->op2 = insn->op2; + + merge->optx = IR_OPTX(op, IR_VOID, 1); + merge->op1 = end2_ref; + merge->op2 = IR_UNUSED; + + cond->op1 = phi->op3; + MAKE_NOP(phi); + CLEAR_USES(phi_ref); + + insn->optx = IR_OPTX(IR_END, IR_VOID, 1); + insn->op1 = merge_ref; + insn->op2 = IR_UNUSED; + + if_true->op1 = end2_ref; + + if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); + if_false->op1 = end1_ref; + if_false->op2 = ref; + + ir_bitqueue_add(worklist, if_false_ref); + if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) { + ir_bitqueue_add(worklist, end2->op1); } + + return 1; } } } } - use_list = &ctx->use_lists[i]; - n = use_list->count; - for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { - use = *p; - if (_values[use].optx != IR_BOTTOM) { - ir_bitqueue_add(&worklist, use); - } - } } -#ifdef IR_DEBUG - if (ctx->flags & IR_DEBUG_SCCP) { - for (i = 1; i < ctx->insns_count; i++) { - if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { - fprintf(stderr, "%d. CONST(", i); - ir_print_const(ctx, &_values[i], stderr, true); - fprintf(stderr, ")\n"); -#if IR_COMBO_COPY_PROPAGATION - } else if (_values[i].op == IR_COPY) { - fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); -#endif - } else if (IR_IS_TOP(i)) { - fprintf(stderr, "%d. TOP\n", i); - } else if (_values[i].op == IR_IF) { - fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); - } else if (_values[i].op == IR_MERGE) { - fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); - } else if (!IR_IS_BOTTOM(i)) { - fprintf(stderr, "%d. %d\n", i, _values[i].op); + return 0; +} + +static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist) +{ + ir_use_list *use_list = &ctx->use_lists[merge_ref]; + + if (use_list->count == 1) { + ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist); + } else if (use_list->count == 2) { + if (merge->inputs_count == 2) { + ir_ref phi_ref = ctx->use_edges[use_list->refs]; + ir_insn *phi = &ctx->ir_base[phi_ref]; + + ir_ref next_ref = ctx->use_edges[use_list->refs + 1]; + ir_insn *next = &ctx->ir_base[next_ref]; + + if (next->op == IR_PHI) { + SWAP_REFS(phi_ref, next_ref); + SWAP_INSNS(phi, next); } - } - } -#endif - for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { - if (value->op == IR_BOTTOM) { - continue; - } else if (IR_IS_CONST_OP(value->op)) { - /* replace instruction by constant */ - j = ir_const(ctx, value->val, value->type); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); - } else if (IR_IS_SYM_CONST(value->op)) { - /* replace instruction by constant */ - j = ir_const_ex(ctx, value->val, value->type, value->optx); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); -#if IR_COMBO_COPY_PROPAGATION - } else if (value->op == IR_COPY) { - ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist2); -#endif - } else if (value->op == IR_TOP) { - /* remove unreachable instruction */ - insn = &ctx->ir_base[i]; - if (insn->op == IR_NOP) { - /* already removed */ - } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { - if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { - ir_sccp_remove_insn(ctx, _values, i, &worklist2); - } - } else { - if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { - /* remove from terminators list */ - ir_ref prev = ctx->ir_base[1].op1; - if (prev == i) { - ctx->ir_base[1].op1 = insn->op3; + if (phi->op == IR_PHI && next->op != IR_PHI) { + if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) { + if (next->op2 == phi_ref) { + if (ir_try_split_if(ctx, next_ref, next, worklist)) { + return; + } } else { - while (prev) { - if (ctx->ir_base[prev].op3 == i) { - ctx->ir_base[prev].op3 = insn->op3; - break; + ir_insn *cmp = &ctx->ir_base[next->op2]; + + if (cmp->op >= IR_EQ && cmp->op <= IR_UGT + && cmp->op1 == phi_ref + && IR_IS_CONST_REF(cmp->op2) + && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op) + && ctx->use_lists[next->op2].count == 1) { + if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) { + return; } - prev = ctx->ir_base[prev].op3; } } } - ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, &worklist2); + ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist); } - } else if (value->op == IR_IF) { - /* remove one way IF/SWITCH */ - ir_sccp_remove_if(ctx, _values, i, value->op1); - } else if (value->op == IR_MERGE) { - /* schedule merge to remove unfeasible MERGE inputs */ - ir_bitqueue_add(&worklist, i); - } else if (value->op == IR_LOAD) { - /* schedule dead load elimination */ - ir_bitqueue_add(&worklist2, i); } } +} - while ((i = ir_bitqueue_pop(&worklist)) >= 0) { - IR_ASSERT(_values[i].op == IR_MERGE); - ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); - } - - ctx->flags2 |= IR_CFG_REACHABLE; +static void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) +{ + ir_ref i; + ir_insn *insn; - while ((i = ir_bitqueue_pop(&worklist2)) >= 0) { + while ((i = ir_bitqueue_pop(worklist)) >= 0) { insn = &ctx->ir_base[i]; if (IR_IS_FOLDABLE_OP(insn->op)) { if (ctx->use_lists[i].count == 0) { if (insn->op == IR_PHI) { - ir_bitqueue_add(&worklist2, insn->op1); + ir_bitqueue_add(worklist, insn->op1); } - ir_sccp_remove_insn2(ctx, i, &worklist2); + ir_iter_remove_insn(ctx, i, worklist); } else { insn = &ctx->ir_base[i]; switch (insn->op) { @@ -2558,14 +2659,14 @@ int ir_sccp(ir_ctx *ctx) if (ir_may_promote_d2f(ctx, insn->op1)) { ir_ref ref = ir_promote_d2f(ctx, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } } else { if (ir_may_promote_f2d(ctx, insn->op1)) { ir_ref ref = ir_promote_f2d(ctx, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } } @@ -2585,13 +2686,13 @@ int ir_sccp(ir_ctx *ctx) if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) { ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } goto folding; case IR_SEXT: case IR_ZEXT: - if (ir_try_promote_ext(ctx, i, insn, &worklist2)) { + if (ir_try_promote_ext(ctx, i, insn, worklist)) { break; } goto folding; @@ -2599,7 +2700,7 @@ int ir_sccp(ir_ctx *ctx) break; default: folding: - ir_sccp_fold2(ctx, i, &worklist2); + ir_iter_fold(ctx, i, worklist); break; } } @@ -2609,10 +2710,10 @@ int ir_sccp(ir_ctx *ctx) } else if (insn->op == IR_BEGIN) { if (ctx->ir_base[insn->op1].op == IR_END && ctx->use_lists[i].count == 1) { - ir_merge_blocks(ctx, insn->op1, i, &worklist2); + ir_merge_blocks(ctx, insn->op1, i, worklist); } } else if (insn->op == IR_MERGE) { - ir_optimize_merge(ctx, i, insn, &worklist2); + ir_optimize_merge(ctx, i, insn, worklist); } } else if (ir_is_dead_load(ctx, i)) { ir_ref next = ctx->use_edges[ctx->use_lists[i].refs]; @@ -2621,16 +2722,34 @@ int ir_sccp(ir_ctx *ctx) ctx->ir_base[next].op1 = insn->op1; ir_use_list_replace_one(ctx, insn->op1, i, next); insn->op1 = IR_UNUSED; - ir_sccp_remove_insn2(ctx, i, &worklist2); + ir_iter_remove_insn(ctx, i, worklist); } } +} + +int ir_sccp(ir_ctx *ctx) +{ + ir_bitqueue sccp_worklist, iter_worklist; + ir_insn *_values; + + ctx->flags2 |= IR_OPT_IN_SCCP; + ir_bitqueue_init(&iter_worklist, ctx->insns_count); + ir_bitqueue_init(&sccp_worklist, ctx->insns_count); + _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); + + ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist); + ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist); ir_mem_free(_values); - ir_bitqueue_free(&worklist); - ir_bitqueue_free(&worklist2); + ir_bitqueue_free(&sccp_worklist); + + ctx->flags2 |= IR_CFG_REACHABLE; + + ir_iter_opt(ctx, &iter_worklist); + + ir_bitqueue_free(&iter_worklist); ctx->flags2 &= ~IR_OPT_IN_SCCP; - ctx->flags2 |= IR_SCCP_DONE; return 1; } diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 58c6ed40f7df2..dce15b5be3b89 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -3090,9 +3090,16 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); + ir_insn *var_insn; + int32_t offset; IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); + var_insn = &ctx->ir_base[src]; + if (var_insn->op == IR_VADDR) { + var_insn = &ctx->ir_base[var_insn->op1]; + } + IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if (offset == 0) { | mov Ra(reg), Ra(base) } else { @@ -7569,7 +7576,11 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) mem = ir_var_spill_slot(ctx, insn->op1); fp = IR_MEM_BASE(mem); offset = IR_MEM_OFFSET(mem); - | lea Ra(def_reg), aword [Ra(fp)+offset] + if (offset == 0) { + | mov Ra(def_reg), Ra(fp) + } else { + | lea Ra(def_reg), aword [Ra(fp)+offset] + } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } @@ -10237,10 +10248,15 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_ref *ops = insn->ops; IR_REGSET_EXCL(available, reg); - if (constraints.tmp_regs[n].num > 0 - && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { - /* rematerialization */ - reg |= IR_REG_SPILL_LOAD; + if (constraints.tmp_regs[n].num > 0) { + if (IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { + /* rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } else if (ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_ALLOCA || + ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_VADDR) { + /* local address rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {