From 36d46a473217929b38b8a7be805f2cb17a68dd85 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Wed, 5 Feb 2025 15:58:25 +0100 Subject: [PATCH 1/3] Fix curl_basic_022.phpt for libcurl 8.12.0 Due to a deliberate change in libcurl, the expiration is now capped to at most 400 days. We could solve this by choosing another date roughly a year in the future, but would need to update the test next year. This would be especially annoying for security branches. Another option would be to actually parse the cookie list lines, but that might not be worth the trouble. Instead we just ignore the exact timestamp created by libcurl. [1] Closes GH-17709. --- ext/curl/tests/curl_basic_022.phpt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/curl/tests/curl_basic_022.phpt b/ext/curl/tests/curl_basic_022.phpt index e905dfd885d16..4a2177e06bd47 100644 --- a/ext/curl/tests/curl_basic_022.phpt +++ b/ext/curl/tests/curl_basic_022.phpt @@ -11,10 +11,10 @@ curl_setopt($ch, CURLOPT_COOKIELIST, 'Set-Cookie: C2=v2; expires=Thu, 31-Dec-203 var_dump(curl_getinfo($ch, CURLINFO_COOKIELIST)); ?> ---EXPECT-- +--EXPECTF-- array(2) { [0]=> - string(38) ".php.net TRUE / FALSE 2145916799 C1 v1" + string(38) ".php.net TRUE / FALSE %d C1 v1" [1]=> - string(38) ".php.net TRUE / FALSE 2145916799 C2 v2" + string(38) ".php.net TRUE / FALSE %d C2 v2" } From dc7b661a6094c75d80c661e8103acb6899af34e4 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 5 Feb 2025 12:34:39 +0000 Subject: [PATCH 2/3] Fix GH-17703: imagescale both width and heigh set with negative values. Throwing a ValueError in this particular case. close GH-17708 --- NEWS | 4 ++++ UPGRADING | 3 +++ ext/gd/gd.c | 5 +++++ ext/gd/tests/gh17703.phpt | 17 +++++++++++++++++ 4 files changed, 29 insertions(+) create mode 100644 ext/gd/tests/gh17703.phpt diff --git a/NEWS b/NEWS index fb3a050b1bb21..5f7017b6368c0 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,10 @@ PHP NEWS . Fixed bug GH-17609 (Typo in error message: Dom\NO_DEFAULT_NS instead of Dom\HTML_NO_DEFAULT_NS). (nielsdos) +- GD: + . Fixed bug GH-17703 (imagescale with both width and height negative values + triggers only an Exception on width). (David Carlier) + - MBString: . Fixed bug GH-17503 (Undefined float conversion in mb_convert_variables). (cmb) diff --git a/UPGRADING b/UPGRADING index 3bab45da53e74..a67fc1b1d88fd 100644 --- a/UPGRADING +++ b/UPGRADING @@ -627,6 +627,9 @@ PHP 8.4 UPGRADE NOTES . DOMDocument::registerNodeClass() now has a tentative return type of true. Previously, the return type was bool but only true could be returned in practice. +- GD: + . imagescale now throws a ValueError when both width and height arguments are negative. + - Hash: . Changed the return type of hash_update() to true. It was already the case that only true could be returned, but the stub was not updated yet. diff --git a/ext/gd/gd.c b/ext/gd/gd.c index c81861ab80193..962041232c618 100644 --- a/ext/gd/gd.c +++ b/ext/gd/gd.c @@ -3981,6 +3981,11 @@ PHP_FUNCTION(imagescale) im = php_gd_libgdimageptr_from_zval_p(IM); + if (tmp_h < 0 && tmp_w < 0) { + zend_value_error("Argument #2 ($width) and argument #3 ($height) cannot be both negative"); + RETURN_THROWS(); + } + if (tmp_h < 0 || tmp_w < 0) { /* preserve ratio */ long src_x, src_y; diff --git a/ext/gd/tests/gh17703.phpt b/ext/gd/tests/gh17703.phpt new file mode 100644 index 0000000000000..4677b6a501391 --- /dev/null +++ b/ext/gd/tests/gh17703.phpt @@ -0,0 +1,17 @@ +--TEST-- +GH-17703 both width and height value being negative triggers ValueError on width. +--EXTENSIONS-- +gd +--FILE-- +getMessage(); +} +?> +--EXPECT-- +Argument #2 ($width) and argument #3 ($height) cannot be both negative From c730175be82608daf3dff0a898dd198089662ff6 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 6 Feb 2025 02:04:03 +0300 Subject: [PATCH 3/3] IR Update IR commit: 36329a0398ff57929c51c52084027730a75b9862 --- ext/opcache/jit/ir/ir_aarch64.dasc | 22 +- ext/opcache/jit/ir/ir_gcm.c | 4 +- ext/opcache/jit/ir/ir_private.h | 4 +- ext/opcache/jit/ir/ir_sccp.c | 2117 +++++++++++++++------------- ext/opcache/jit/ir/ir_x86.dasc | 28 +- 5 files changed, 1163 insertions(+), 1012 deletions(-) diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index b96d47461196c..772eea7a5d78a 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -1383,9 +1383,16 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); + ir_insn *var_insn; + int32_t offset; IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); + var_insn = &ctx->ir_base[src]; + if (var_insn->op == IR_VADDR) { + var_insn = &ctx->ir_base[var_insn->op1]; + } + IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if (aarch64_may_encode_imm12(offset)) { | add Rx(reg), Rx(base), #offset } else { @@ -5680,10 +5687,15 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_ref *ops = insn->ops; IR_REGSET_EXCL(available, reg); - if (constraints.tmp_regs[n].num > 0 - && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { - /* rematerialization */ - reg |= IR_REG_SPILL_LOAD; + if (constraints.tmp_regs[n].num > 0) { + if (IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { + /* rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } else if (ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_ALLOCA || + ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_VADDR) { + /* local address rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 12103a174d07c..be8744ef198fd 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -1059,7 +1059,7 @@ int ir_schedule(ir_ctx *ctx) if (ctx->flags & IR_DEBUG_SCHEDULE) { fprintf(stderr, "After Schedule\n"); for (i = 1; i != 0; i = _next[i]) { - fprintf(stderr, "%d -> %d\n", i, _blocks[i]); + fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]); } } #endif @@ -1328,11 +1328,13 @@ int ir_schedule(ir_ctx *ctx) new_ctx.cfg_edges = ctx->cfg_edges; ctx->cfg_blocks = NULL; ctx->cfg_edges = NULL; + ir_code_buffer *saved_code_buffer = ctx->code_buffer; ir_free(ctx); IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit); IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit); memcpy(ctx, &new_ctx, sizeof(ir_ctx)); + ctx->code_buffer = saved_code_buffer; ctx->flags2 |= IR_LINEAR; ir_mem_free(_next); diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index f980b86b89320..e28897e0dc407 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1013,8 +1013,10 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) #define IR_HAS_FP_RET_SLOT (1<<10) #define IR_16B_FRAME_ALIGNMENT (1<<11) +/* Temporary: MEM2SSA -> SCCP */ +#define IR_MEM2SSA_VARS (1<<25) + /* Temporary: SCCP -> CFG */ -#define IR_SCCP_DONE (1<<25) #define IR_CFG_REACHABLE (1<<26) /* Temporary: Dominators -> Loops */ diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 680e86c508657..996847d58a7e7 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -12,27 +12,48 @@ #include "ir.h" #include "ir_private.h" +#define IR_COMBO_COPY_PROPAGATION 1 + #define IR_TOP IR_UNUSED #define IR_BOTTOM IR_LAST_OP #define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0) #define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0) -#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP) -#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM) -#define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP) +#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].op == IR_TOP) +#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].op == IR_BOTTOM) +#define IR_IS_REACHABLE(ref) _ir_is_reachable_ctrl(ctx, _values, ref) +#define IR_IS_CONST(ref) (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op)) -#define IR_COMBO_COPY_PROPAGATION 1 +IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref) +{ + IR_ASSERT(!IR_IS_CONST_REF(ref)); + IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL); + return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */ +} #if IR_COMBO_COPY_PROPAGATION -IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a) +IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a) { if (a > 0 && _values[a].op == IR_COPY) { - a = _values[a].op1; - IR_ASSERT(a < 0 || _values[a].op != IR_COPY); /* this may be a copy of symbolic constant */ + do { + a = _values[a].op1; + } while (a > 0 && _values[a].op == IR_COPY); + IR_ASSERT(a < 0 || _values[a].op == IR_BOTTOM); + IR_ASSERT(a > 0 || IR_IS_SYM_CONST(ctx->ir_base[a].op)); } return a; } + +static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst, ir_type type) +{ + IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY); + IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)) + || (src < 0 && IR_IS_SYM_CONST(ctx->ir_base[src].op))); + IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst); + _values[dst].optx = IR_OPT(IR_COPY, type); + _values[dst].op1 = src; +} #endif static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) @@ -40,9 +61,9 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t o ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; #if IR_COMBO_COPY_PROPAGATION - op1 = ir_sccp_identity(_values, op1); - op2 = ir_sccp_identity(_values, op2); - op3 = ir_sccp_identity(_values, op3); + op1 = ir_sccp_identity(ctx, _values, op1); + op2 = ir_sccp_identity(ctx, _values, op2); + op3 = ir_sccp_identity(ctx, _values, op3); #endif restart: @@ -58,33 +79,31 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t o op3 = ctx->fold_insn.op3; goto restart; case IR_FOLD_DO_EMIT: - IR_MAKE_BOTTOM(res); - return 1; + goto make_bottom; case IR_FOLD_DO_COPY: op1 = ctx->fold_insn.op1; -#if IR_COMBO_COPY_PROPAGATION - op1 = ir_sccp_identity(_values, op1); -#endif insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; if (IR_IS_CONST_OP(insn->op)) { /* pass */ -#if IR_COMBO_COPY_PROPAGATION - } else if (_values[res].optx == IR_TOP) { - _values[res].optx = IR_OPT(IR_COPY, insn->type); - _values[res].op1 = op1; - return 1; - } else if (_values[res].op == IR_COPY && _values[res].op1 == op1) { - return 0; /* not changed */ } else { - IR_ASSERT(_values[res].optx != IR_BOTTOM); - /* we don't check for widening */ - _values[res].optx = IR_OPT(IR_COPY, insn->type); - _values[res].op1 = op1; +#if IR_COMBO_COPY_PROPAGATION + if (_values[res].op == IR_TOP) { + /* pass to new copy */ + } else if (_values[res].op == IR_COPY) { + if (ir_sccp_identity(ctx, _values, _values[res].op1) == ir_sccp_identity(ctx, _values, op1)) { + return 0; /* not changed */ + } else { + goto make_bottom; + } + } else { + IR_ASSERT(_values[res].op != IR_BOTTOM); + /* we don't check for widening */ + } + /* create new COPY */ + ir_sccp_add_identity(ctx, _values, op1, res, insn->type); return 1; #else - } else { - IR_MAKE_BOTTOM(res); - return 1; + goto make_bottom; #endif } break; @@ -100,11 +119,13 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t o _values[res].optx = IR_OPT(insn->type, insn->type); _values[res].val.u64 = insn->val.u64; return 1; - } else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) { - IR_MAKE_BOTTOM(res); - return 1; + } else if (_values[res].opt == IR_OPT(insn->type, insn->type) && _values[res].val.u64 == insn->val.u64) { + return 0; /* not changed */ } - return 0; /* not changed */ + +make_bottom: + IR_MAKE_BOTTOM(res); + return 1; } static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *insn, ir_bitqueue *worklist) @@ -112,14 +133,16 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i ir_ref j, n, input, *merge_input, *p; ir_insn *v, *new_const = NULL; #if IR_COMBO_COPY_PROPAGATION - ir_ref new_copy; + ir_ref new_copy = IR_UNUSED; + ir_ref new_copy_identity = IR_UNUSED; + ir_ref phi_identity = ir_sccp_identity(ctx, _values, i); #endif - if (!IR_IS_FEASIBLE(insn->op1)) { + if (!IR_IS_REACHABLE(insn->op1)) { return 0; } n = insn->inputs_count; - if (n > 3 && _values[i].optx == IR_TOP) { + if (n > 3 && _values[i].op == IR_TOP) { for (j = 0; j < (n>>2); j++) { _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ } @@ -129,7 +152,7 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i merge_input = ctx->ir_base[insn->op1].ops + 1; for (; --n > 0; p++, merge_input++) { IR_ASSERT(*merge_input > 0); - if (_values[*merge_input].optx == IR_TOP) { + if (!IR_IS_REACHABLE(*merge_input)) { continue; } @@ -150,25 +173,30 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { input = v->op1; - IR_ASSERT(input < 0 || _values[input].op != IR_COPY); + new_copy_identity = ir_sccp_identity(ctx, _values, input); + if (new_copy_identity == phi_identity) { + new_copy_identity = IR_UNUSED; + continue; + } new_copy = input; goto next; +#endif } else if (v->op == IR_BOTTOM) { - new_copy = input; +#if IR_COMBO_COPY_PROPAGATION + if (input == phi_identity) { + continue; + } + new_copy = new_copy_identity = input; goto next; #else - } else if (v->op == IR_BOTTOM) { - IR_MAKE_BOTTOM(i); - return 1; + goto make_bottom; #endif } } - new_copy = IR_UNUSED; new_const = v; goto next; } - IR_ASSERT(_values[i].optx == IR_TOP); return 0; next: @@ -177,12 +205,17 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i /* for all live merge inputs */ for (; --n > 0; p++, merge_input++) { IR_ASSERT(*merge_input > 0); - if (_values[*merge_input].optx == IR_TOP) { + if (!IR_IS_REACHABLE(*merge_input)) { continue; } input = *p; if (IR_IS_CONST_REF(input)) { +#if IR_COMBO_COPY_PROPAGATION + if (new_copy) { + goto make_bottom; + } +#endif v = &ctx->ir_base[input]; } else if (input == i) { continue; @@ -197,58 +230,55 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i continue; #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { - input = v->op1; - IR_ASSERT(input < 0 || _values[input].op != IR_COPY); - if (new_copy == input) { + ir_ref identity = ir_sccp_identity(ctx, _values, v->op1); + + if (identity == phi_identity || identity == new_copy_identity) { continue; - } else { - IR_MAKE_BOTTOM(i); - return 1; } + goto make_bottom; +#endif } else if (v->op == IR_BOTTOM) { - if (new_copy == input) { +#if IR_COMBO_COPY_PROPAGATION + if (input == phi_identity || input == new_copy_identity) { continue; - } else { - IR_MAKE_BOTTOM(i); - return 1; } -#else - } else if (v->op == IR_BOTTOM) { - IR_MAKE_BOTTOM(i); - return 1; #endif + goto make_bottom; } } if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) { - IR_MAKE_BOTTOM(i); - return 1; + goto make_bottom; } } #if IR_COMBO_COPY_PROPAGATION if (new_copy) { - if (_values[i].op == IR_COPY && _values[i].op1 == new_copy) { - return 0; /* not changed */ + if (_values[i].op == IR_COPY) { + if (phi_identity == new_copy_identity) { + return 0; /* not changed */ + } else { + goto make_bottom; + } } else { - IR_ASSERT(_values[i].optx != IR_BOTTOM); + IR_ASSERT(_values[i].op != IR_BOTTOM); /* we don't check for widening */ - _values[i].optx = IR_OPT(IR_COPY, ctx->ir_base[new_copy].type); - _values[i].op1 = new_copy; + ir_sccp_add_identity(ctx, _values, new_copy, i, insn->type); return 1; } } #endif - if (_values[i].optx == IR_TOP) { + if (_values[i].op == IR_TOP) { _values[i].optx = new_const->opt; _values[i].val.u64 = new_const->val.u64; return 1; } else if (_values[i].opt == new_const->opt && _values[i].val.u64 == new_const->val.u64) { return 0; - } else { - IR_MAKE_BOTTOM(i); - return 1; } + +make_bottom: + IR_MAKE_BOTTOM(i); + return 1; } static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn) @@ -285,25 +315,6 @@ static bool ir_is_dead(ir_ctx *ctx, ir_ref ref) return 0; } -static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1) -{ - IR_ASSERT(!IR_IS_CONST_REF(op1)); - - ir_use_list *use_list = &ctx->use_lists[op1]; - ir_ref *p, n = use_list->count; - - for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { - ir_ref use = *p; - ir_insn *use_insn = &ctx->ir_base[use]; - - if (use_insn->optx == optx) { - IR_ASSERT(use_insn->op1 == op1); - return use; - } - } - return IR_UNUSED; -} - static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a) { ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; @@ -321,154 +332,363 @@ static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) return v1->val.u64 == v2->val.u64; } -static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) +#ifdef IR_SCCP_TRACE +static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i) { - ir_ref j, n, *p; - ir_insn *insn; - - CLEAR_USES(ref); - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + j; j <= n; j++, p++) { - *p = IR_UNUSED; + if (IR_IS_BOTTOM(i)) { + fprintf(stderr, "BOTTOM"); + } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { + fprintf(stderr, "CONST("); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "COPY(%d)", _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "TOP"); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "IF(%d)", _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "MERGE(%d)", _values[i].op1); + } else { + fprintf(stderr, "%d", _values[i].op); } } -static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i) { - ir_ref j, n, *p; - ir_insn *insn; - - CLEAR_USES(ref); - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + j; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (input > 0 && _values[input].op > IR_COPY) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } - } - } + fprintf(stderr, "%d. ", i); + ir_sccp_trace_val(ctx, _values, i); } -static void ir_sccp_remove_insn2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i) { - ir_ref j, n, *p; - ir_insn *insn; - - CLEAR_USES(ref); - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + j; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - if (input > 0) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { - /* try to optimize PHI into ABS/MIN/MAX/COND */ - ir_bitqueue_add(worklist, ctx->ir_base[input].op1); - } - } - } + fprintf(stderr, " -> "); + ir_sccp_trace_val(ctx, _values, i); + fprintf(stderr, "\n"); } +#else +# define ir_sccp_trace_start(c, v, i) +# define ir_sccp_trace_end(c, v, i) +#endif -static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) { - ir_ref j, n, *p, use, i; - ir_insn *insn; + ir_ref i, j, n, *p, use; ir_use_list *use_list; + ir_insn *insn, *use_insn; + uint32_t flags; - IR_ASSERT(ref != new_ref); + /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ + worklist->pos = 0; + ir_bitset_incl(worklist->set, 1); + for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) { + IR_ASSERT(_values[i].op != IR_BOTTOM); + ir_sccp_trace_start(ctx, _values, i); + insn = &ctx->ir_base[i]; + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_DATA) { + if (ctx->use_lists[i].count == 0) { + /* dead code */ + continue; + } else if (insn->op == IR_PHI) { + if (!ir_sccp_meet_phi(ctx, _values, i, insn, worklist)) { + continue; + } + } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { + bool may_benefit = 0; + bool has_top = 0; - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (input > 0 && _values[input].op > IR_COPY) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } - } - } + if ((ctx->flags2 & IR_MEM2SSA_VARS) || _values[i].op != IR_TOP) { + may_benefit = 1; + } - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs]; - if (new_ref <= 0) { - /* constant or IR_UNUSED */ - for (; n; p++, n--) { - use = *p; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (_values[use].op > IR_COPY) { - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - if (!i) continue; - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - /* schedule folding */ - ir_bitqueue_add(worklist, use); - } - } - } else { - for (j = 0; j < n; j++, p++) { - use = *p; - /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (_values[use].optx == IR_BOTTOM) { - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - if (ir_use_list_add(ctx, new_ref, use)) { - /* restore after reallocation */ - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs + j]; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); + n = IR_INPUT_EDGES_COUNT(flags); + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + if (input > 0) { + if (_values[input].op == IR_TOP) { + has_top = 1; + /* do backward propagaton only once */ + if (!_values[input].op1) { + _values[input].op1 = 1; + ir_bitqueue_add(worklist, input); + } + } else if (_values[input].op != IR_BOTTOM) { + /* Perform folding only if some of direct inputs + * is going to be replaced by a constant or copy. + * This approach may miss some folding optimizations + * dependent on indirect inputs. e.g. reassociation. + */ + may_benefit = 1; + } + } } - /* schedule folding */ - ir_bitqueue_add(worklist, use); + if (has_top) { + continue; + } + if (!may_benefit) { + IR_MAKE_BOTTOM(i); + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { + ir_bitqueue_add(iter_worklist, i); + } + } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { + /* not changed */ + continue; + } else if (_values[i].op == IR_BOTTOM) { + insn = &ctx->ir_base[i]; + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { + ir_bitqueue_add(iter_worklist, i); + } + } + } else { + IR_MAKE_BOTTOM(i); } - } - } - CLEAR_USES(ref); -} - -static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) -{ - ir_ref i, j, n, *p, use; - ir_insn *insn; - ir_use_list *use_list; - - IR_ASSERT(ref != new_ref); + } else if (flags & IR_OP_FLAG_BB_START) { + if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { + ir_bitqueue_add(iter_worklist, i); + } + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + ir_ref unfeasible_inputs = 0; - insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + if (n > 3 && _values[i].op == IR_TOP) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + } + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + IR_ASSERT(input > 0); + if (!IR_IS_REACHABLE(input)) { + unfeasible_inputs++; + } + } + if (unfeasible_inputs == 0) { + IR_MAKE_BOTTOM(i); + } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) { + _values[i].optx = IR_MERGE; + _values[i].op1 = unfeasible_inputs; + } else { + continue; + } + } else { + IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1)); + IR_MAKE_BOTTOM(i); + } + } else { + IR_ASSERT(insn->op1 > 0); + if (!IR_IS_REACHABLE(insn->op1)) { + /* control inpt is not feasible */ + continue; + } + if (insn->op == IR_IF) { + if (IR_IS_TOP(insn->op2)) { + /* do backward propagaton only once */ + if (!_values[insn->op2].op1) { + _values[insn->op2].op1 = 1; + ir_bitqueue_add(worklist, insn->op2); + } + continue; + } + if (IR_IS_CONST(insn->op2)) { + bool b = ir_sccp_is_true(ctx, _values, insn->op2); + use_list = &ctx->use_lists[i]; + IR_ASSERT(use_list->count == 2); + p = &ctx->use_edges[use_list->refs]; + use = *p; + use_insn = &ctx->ir_base[use]; + IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); + if ((use_insn->op == IR_IF_TRUE) != b) { + use = *(p+1); + IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); + } + if (_values[i].op == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use; + ir_bitqueue_add(worklist, use); + continue; + } else if (_values[i].op == IR_IF && _values[i].op1 == use) { + continue; + } + } + IR_MAKE_BOTTOM(i); + } else if (insn->op == IR_SWITCH) { + if (IR_IS_TOP(insn->op2)) { + /* do backward propagaton only once */ + if (!_values[insn->op2].op1) { + _values[insn->op2].op1 = 1; + ir_bitqueue_add(worklist, insn->op2); + } + continue; + } + if (IR_IS_CONST(insn->op2)) { + ir_ref use_case = IR_UNUSED; + + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + IR_ASSERT(use > 0); + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_CASE_VAL) { + if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { + use_case = use; + break; + } + } else if (use_insn->op == IR_CASE_DEFAULT) { + use_case = use; + } + } + if (use_case) { + use_insn = &ctx->ir_base[use_case]; + if (_values[i].op == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use_case; + ir_bitqueue_add(worklist, use_case); + continue; + } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) { + continue; + } + } + } + IR_MAKE_BOTTOM(i); + } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) { + /* schedule dead load elimination */ + ir_bitqueue_add(iter_worklist, i); + IR_MAKE_BOTTOM(i); + } else { + IR_MAKE_BOTTOM(i); + + /* control, call, load and store instructions may have unprocessed inputs */ + n = IR_INPUT_EDGES_COUNT(flags); + if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + for (j = 2, p = insn->ops + j; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + use = *p; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + } + } else if (n >= 2) { + IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); + use = insn->op2; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + if (n > 2) { + IR_ASSERT(n == 3); + IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); + use = insn->op3; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + } + } + } + } + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (_values[use].op != IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCCP) { + for (i = 1; i < ctx->insns_count; i++) { + if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { + fprintf(stderr, "%d. CONST(", i); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")\n"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "%d. TOP\n", i); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); + } else if (!IR_IS_BOTTOM(i)) { + fprintf(stderr, "%d. %d\n", i, _values[i].op); + } + } + } +#endif +} + +/**********************/ +/* SCCP trasformation */ +/**********************/ + +static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) +{ + ir_ref j, n, *p; + ir_insn *insn; + + CLEAR_USES(ref); + insn = &ctx->ir_base[ref]; n = insn->inputs_count; insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + *p = IR_UNUSED; + } +} + +static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p; + ir_insn *insn; + + CLEAR_USES(ref); + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { ir_ref input = *p; *p = IR_UNUSED; - if (input > 0) { + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (input > 0 && _values[input].op > IR_COPY) { ir_use_list_remove_all(ctx, input, ref); if (ir_is_dead(ctx, input)) { /* schedule DCE */ ir_bitqueue_add(worklist, input); - } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { - /* try to optimize PHI into ABS/MIN/MAX/COND */ + } + } + } +} + +static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p, use, i; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (input > 0 && _values[input].op > IR_COPY) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ ir_bitqueue_add(worklist, input); } } @@ -481,126 +701,44 @@ static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bi /* constant or IR_UNUSED */ for (; n; p++, n--) { use = *p; - IR_ASSERT(use != ref); - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - /* schedule folding */ - ir_bitqueue_add(worklist, use); + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (_values[use].op > IR_COPY) { + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + if (!i) continue; + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } } } else { for (j = 0; j < n; j++, p++) { use = *p; - IR_ASSERT(use != ref); - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - if (ir_use_list_add(ctx, new_ref, use)) { - /* restore after reallocation */ - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs + j]; + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (_values[use].op == IR_BOTTOM) { + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + if (ir_use_list_add(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + /* schedule folding */ + ir_bitqueue_add(worklist, use); } - /* schedule folding */ - ir_bitqueue_add(worklist, use); } } CLEAR_USES(ref); } -static void ir_sccp_fold2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) { - uint32_t opt; - ir_ref op1, op2, op3; - ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; - - insn = &ctx->ir_base[ref]; - opt = insn->opt; - op1 = insn->op1; - op2 = insn->op2; - op3 = insn->op3; - -restart: - op1_insn = ctx->ir_base + op1; - op2_insn = ctx->ir_base + op2; - op3_insn = ctx->ir_base + op3; - - switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { - case IR_FOLD_DO_RESTART: - opt = ctx->fold_insn.optx; - op1 = ctx->fold_insn.op1; - op2 = ctx->fold_insn.op2; - op3 = ctx->fold_insn.op3; - goto restart; - case IR_FOLD_DO_EMIT: - insn = &ctx->ir_base[ref]; - if (insn->opt != ctx->fold_insn.opt - || insn->op1 != ctx->fold_insn.op1 - || insn->op2 != ctx->fold_insn.op2 - || insn->op3 != ctx->fold_insn.op3) { - - ir_use_list *use_list; - ir_ref n, j, *p, use; - - insn->optx = ctx->fold_insn.opt; - IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); - insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); - if (insn->op1 != ctx->fold_insn.op1) { - if (insn->op1 > 0) { - ir_use_list_remove_one(ctx, insn->op1, ref); - } - if (ctx->fold_insn.op1 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op1, ref); - } - } - if (insn->op2 != ctx->fold_insn.op2) { - if (insn->op2 > 0) { - ir_use_list_remove_one(ctx, insn->op2, ref); - } - if (ctx->fold_insn.op2 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op2, ref); - } - } - if (insn->op3 != ctx->fold_insn.op3) { - if (insn->op3 > 0) { - ir_use_list_remove_one(ctx, insn->op3, ref); - } - if (ctx->fold_insn.op3 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op3, ref); - } - } - insn->op1 = ctx->fold_insn.op1; - insn->op2 = ctx->fold_insn.op2; - insn->op3 = ctx->fold_insn.op3; - - use_list = &ctx->use_lists[ref]; - n = use_list->count; - for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - ir_bitqueue_add(worklist, use); - } - } - break; - case IR_FOLD_DO_COPY: - op1 = ctx->fold_insn.op1; - ir_sccp_replace_insn2(ctx, ref, op1, worklist); - break; - case IR_FOLD_DO_CONST: - op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); - ir_sccp_replace_insn2(ctx, ref, op1, worklist); - break; - default: - IR_ASSERT(0); - break; - } -} - -static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) -{ - ir_ref next; - ir_insn *insn, *next_insn; + ir_ref next; + ir_insn *insn, *next_insn; insn = &ctx->ir_base[ref]; if (ctx->use_lists[dst].count == 1) { @@ -634,44 +772,71 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values /* remove MERGE completely */ for (j = 1; j <= n; j++) { ir_ref input = ir_insn_op(insn, j); - if (input && IR_IS_FEASIBLE(input)) { + if (input && IR_IS_REACHABLE(input)) { ir_insn *input_insn = &ctx->ir_base[input]; IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_LOOP_END|| input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE); if (input_insn->op == IR_END || input_insn->op == IR_LOOP_END) { - if (input < ref) { - ir_ref prev, next = IR_UNUSED; - ir_insn *next_insn = NULL; - - prev = input_insn->op1; - use_list = &ctx->use_lists[ref]; - if (use_list->count == 1) { - next = ctx->use_edges[use_list->refs]; - next_insn = &ctx->ir_base[next]; - } else { - for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { - use = *p; - use_insn = &ctx->ir_base[use]; - IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); - if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { - IR_ASSERT(!next); - next = use; - next_insn = use_insn; - } else if (use_insn->op != IR_NOP) { - IR_ASSERT(use_insn->op1 == ref); - IR_ASSERT(use_insn->op == IR_VAR); - ir_ref region = prev; - while (!IR_IS_BB_START(ctx->ir_base[region].op)) { - region = ctx->ir_base[region].op1; + ir_ref prev, next = IR_UNUSED; + ir_insn *next_insn = NULL; + + prev = input_insn->op1; + use_list = &ctx->use_lists[ref]; + if (use_list->count == 1) { + next = ctx->use_edges[use_list->refs]; + next_insn = &ctx->ir_base[next]; + } else { + k = 0; + p = &ctx->use_edges[use_list->refs]; + while (k < use_list->count) { + use = *p; + use_insn = &ctx->ir_base[use]; +#if IR_COMBO_COPY_PROPAGATION + IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); +#else + if (use_insn->op == IR_PHI) { + /* Convert PHI into COPY */ + ir_ref i, n = use_insn->inputs_count; + + for (i = 2; i <= n; i++) { + if (i != j + 1) { + ir_ref from = ir_insn_op(use_insn, i); + if (from > 0) { + ir_use_list_remove_one(ctx, from, use); + } + ir_insn_set_op(use_insn, i, IR_UNUSED); } - use_insn->op1 = region; - ir_use_list_add(ctx, region, use); - p = &ctx->use_edges[use_list->refs + k]; } + use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1); + use_insn->op1 = ir_insn_op(use_insn, j + 1); + ir_insn_set_op(use_insn, j + 1, IR_UNUSED); + ir_use_list_remove_one(ctx, ref, use); + p = &ctx->use_edges[use_list->refs + k]; + continue; + } +#endif + if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { + IR_ASSERT(!next); + next = use; + next_insn = use_insn; + } else if (use_insn->op != IR_NOP) { + IR_ASSERT(use_insn->op1 == ref); + IR_ASSERT(use_insn->op == IR_VAR); + ir_ref region = prev; + while (!IR_IS_BB_START(ctx->ir_base[region].op)) { + region = ctx->ir_base[region].op1; + } + use_insn->op1 = region; + ir_use_list_add(ctx, region, use); + p = &ctx->use_edges[use_list->refs + k]; } + k++; + p++; } - IR_ASSERT(prev && next); + } + IR_ASSERT(prev && next); + if (prev < next) { /* remove MERGE and input END from double linked control list */ next_insn->op1 = prev; ir_use_list_replace_one(ctx, prev, input, next); @@ -752,236 +917,325 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } } -static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref) +static void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) { - ir_insn *insn = &ctx->ir_base[ref]; + ir_ref i, j; + ir_insn *value; - IR_ASSERT(insn->type == IR_DOUBLE); - if (IR_IS_CONST_REF(ref)) { - return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d; - } else { - switch (insn->op) { - case IR_FP2FP: - return 1; -// case IR_INT2FP: -// return ctx->use_lists[ref].count == 1; - case IR_NEG: - case IR_ABS: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_d2f(ctx, insn->op1); - case IR_ADD: - case IR_SUB: - case IR_MUL: - case IR_DIV: - case IR_MIN: - case IR_MAX: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_d2f(ctx, insn->op1) && - ir_may_promote_d2f(ctx, insn->op2); - default: - break; + for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { + if (value->op == IR_BOTTOM) { + continue; + } else if (IR_IS_CONST_OP(value->op)) { + /* replace instruction by constant */ + j = ir_const(ctx, value->val, value->type); + ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); + } else if (IR_IS_SYM_CONST(value->op)) { + /* replace instruction by constant */ + j = ir_const_ex(ctx, value->val, value->type, value->optx); + ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); +#if IR_COMBO_COPY_PROPAGATION + } else if (value->op == IR_COPY) { + ir_sccp_replace_insn(ctx, _values, i, value->op1, iter_worklist); +#endif + } else if (value->op == IR_TOP) { + /* remove unreachable instruction */ + ir_insn *insn = &ctx->ir_base[i]; + + if (insn->op == IR_NOP) { + /* already removed */ + } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { + if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { + ir_sccp_remove_insn(ctx, _values, i, iter_worklist); + } + } else { + if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { + /* remove from terminators list */ + ir_ref prev = ctx->ir_base[1].op1; + if (prev == i) { + ctx->ir_base[1].op1 = insn->op3; + } else { + while (prev) { + if (ctx->ir_base[prev].op3 == i) { + ctx->ir_base[prev].op3 = insn->op3; + break; + } + prev = ctx->ir_base[prev].op3; + } + } + } + ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist); + } + } else if (value->op == IR_IF) { + /* remove one way IF/SWITCH */ + ir_sccp_remove_if(ctx, _values, i, value->op1); + } else if (value->op == IR_MERGE) { + /* schedule merge to remove unfeasible MERGE inputs */ + ir_bitqueue_add(worklist, i); } } - return 0; + + while ((i = ir_bitqueue_pop(worklist)) >= 0) { + IR_ASSERT(_values[i].op == IR_MERGE); + ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); + } } -static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) +/***************************/ +/* Iterative Optimizations */ +/***************************/ + +static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1) { - ir_insn *insn = &ctx->ir_base[ref]; + IR_ASSERT(!IR_IS_CONST_REF(op1)); - IR_ASSERT(insn->type == IR_FLOAT); - if (IR_IS_CONST_REF(ref)) { - return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f; - } else { - switch (insn->op) { - case IR_FP2FP: - return 1; - case IR_INT2FP: - return ctx->use_lists[ref].count == 1; - case IR_NEG: - case IR_ABS: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_f2d(ctx, insn->op1); - case IR_ADD: - case IR_SUB: - case IR_MUL: -// case IR_DIV: - case IR_MIN: - case IR_MAX: - return ctx->use_lists[ref].count == 1 && - ir_may_promote_f2d(ctx, insn->op1) && - ir_may_promote_f2d(ctx, insn->op2); - default: - break; + ir_use_list *use_list = &ctx->use_lists[op1]; + ir_ref *p, n = use_list->count; + + for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { + ir_ref use = *p; + ir_insn *use_insn = &ctx->ir_base[use]; + + if (use_insn->optx == optx) { + IR_ASSERT(use_insn->op1 == op1); + return use; } } - return 0; + return IR_UNUSED; } -static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) +static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) { - ir_insn *insn = &ctx->ir_base[ref]; - uint32_t count; + ir_ref j, n, *p; + ir_insn *insn; - IR_ASSERT(insn->type == IR_DOUBLE); - if (IR_IS_CONST_REF(ref)) { - return ir_const_float(ctx, (float)insn->val.d); - } else { - switch (insn->op) { - case IR_FP2FP: - count = ctx->use_lists[ref].count; - ir_use_list_remove_all(ctx, ref, use); - if (ctx->use_lists[ref].count == 0) { - ir_use_list_replace_one(ctx, insn->op1, ref, use); - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); - } - ref = insn->op1; - MAKE_NOP(insn); - return ref; - } else { - ir_use_list_add(ctx, insn->op1, use); - count -= ctx->use_lists[ref].count; - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); - } - } - return insn->op1; -// case IR_INT2FP: -// insn->type = IR_FLOAT; -// return ref; - case IR_NEG: - case IR_ABS: - insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); - insn->type = IR_FLOAT; - return ref; - case IR_ADD: - case IR_SUB: - case IR_MUL: - case IR_DIV: - case IR_MIN: - case IR_MAX: - if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); - } else { - insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); - insn->op2 = ir_promote_d2f(ctx, insn->op2, ref); - } - insn->type = IR_FLOAT; - return ref; - default: - break; + CLEAR_USES(ref); + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, input); + } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { + /* try to optimize PHI into ABS/MIN/MAX/COND */ + ir_bitqueue_add(worklist, ctx->ir_base[input].op1); + } } } - IR_ASSERT(0); - return ref; } -static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) +static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) { - ir_insn *insn = &ctx->ir_base[ref]; - uint32_t count; - ir_ref old_ref; + ir_ref i, j, n, *p, use; + ir_insn *insn; + ir_use_list *use_list; - IR_ASSERT(insn->type == IR_FLOAT); - if (IR_IS_CONST_REF(ref)) { - return ir_const_double(ctx, (double)insn->val.f); + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, input); + } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { + /* try to optimize PHI into ABS/MIN/MAX/COND */ + ir_bitqueue_add(worklist, input); + } + } + } + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs]; + if (new_ref <= 0) { + /* constant or IR_UNUSED */ + for (; n; p++, n--) { + use = *p; + IR_ASSERT(use != ref); + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } } else { - switch (insn->op) { - case IR_FP2FP: - count = ctx->use_lists[ref].count; - ir_use_list_remove_all(ctx, ref, use); - if (ctx->use_lists[ref].count == 0) { - ir_use_list_replace_one(ctx, insn->op1, ref, use); - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); + for (j = 0; j < n; j++, p++) { + use = *p; + IR_ASSERT(use != ref); + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + if (ir_use_list_add(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } + } + CLEAR_USES(ref); +} + +static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +{ + uint32_t opt; + ir_ref op1, op2, op3; + ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + + insn = &ctx->ir_base[ref]; + opt = insn->opt; + op1 = insn->op1; + op2 = insn->op2; + op3 = insn->op3; + +restart: + op1_insn = ctx->ir_base + op1; + op2_insn = ctx->ir_base + op2; + op3_insn = ctx->ir_base + op3; + + switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { + case IR_FOLD_DO_RESTART: + opt = ctx->fold_insn.optx; + op1 = ctx->fold_insn.op1; + op2 = ctx->fold_insn.op2; + op3 = ctx->fold_insn.op3; + goto restart; + case IR_FOLD_DO_EMIT: + insn = &ctx->ir_base[ref]; + if (insn->opt != ctx->fold_insn.opt + || insn->op1 != ctx->fold_insn.op1 + || insn->op2 != ctx->fold_insn.op2 + || insn->op3 != ctx->fold_insn.op3) { + + ir_use_list *use_list; + ir_ref n, j, *p, use; + + insn->optx = ctx->fold_insn.opt; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); + insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); + if (insn->op1 != ctx->fold_insn.op1) { + if (insn->op1 > 0) { + ir_use_list_remove_one(ctx, insn->op1, ref); } - ref = insn->op1; - MAKE_NOP(insn); - return ref; - } else { - ir_use_list_add(ctx, insn->op1, use); - count -= ctx->use_lists[ref].count; - if (count > 1) { - do { - ir_use_list_add(ctx, insn->op1, use); - } while (--count > 1); + if (ctx->fold_insn.op1 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op1, ref); } } - return insn->op1; - case IR_INT2FP: - old_ref = ir_find1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1); - if (old_ref) { - IR_ASSERT(ctx->use_lists[ref].count == 1); - ir_use_list_remove_one(ctx, insn->op1, ref); - CLEAR_USES(ref); - MAKE_NOP(insn); - ir_use_list_add(ctx, old_ref, use); - return old_ref; + if (insn->op2 != ctx->fold_insn.op2) { + if (insn->op2 > 0) { + ir_use_list_remove_one(ctx, insn->op2, ref); + } + if (ctx->fold_insn.op2 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op2, ref); + } } - insn->type = IR_DOUBLE; - return ref; + if (insn->op3 != ctx->fold_insn.op3) { + if (insn->op3 > 0) { + ir_use_list_remove_one(ctx, insn->op3, ref); + } + if (ctx->fold_insn.op3 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op3, ref); + } + } + insn->op1 = ctx->fold_insn.op1; + insn->op2 = ctx->fold_insn.op2; + insn->op3 = ctx->fold_insn.op3; + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + ir_bitqueue_add(worklist, use); + } + } + break; + case IR_FOLD_DO_COPY: + op1 = ctx->fold_insn.op1; + ir_iter_replace_insn(ctx, ref, op1, worklist); + break; + case IR_FOLD_DO_CONST: + op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); + ir_iter_replace_insn(ctx, ref, op1, worklist); + break; + default: + IR_ASSERT(0); + break; + } +} + +static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->type == IR_DOUBLE); + if (IR_IS_CONST_REF(ref)) { + return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d; + } else { + switch (insn->op) { + case IR_FP2FP: + return 1; +// case IR_INT2FP: +// return ctx->use_lists[ref].count == 1; case IR_NEG: case IR_ABS: - insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); - insn->type = IR_DOUBLE; - return ref; + return ctx->use_lists[ref].count == 1 && + ir_may_promote_d2f(ctx, insn->op1); case IR_ADD: case IR_SUB: case IR_MUL: -// case IR_DIV: + case IR_DIV: case IR_MIN: case IR_MAX: - if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); - } else { - insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); - insn->op2 = ir_promote_f2d(ctx, insn->op2, ref); - } - insn->type = IR_DOUBLE; - return ref; + return ctx->use_lists[ref].count == 1 && + ir_may_promote_d2f(ctx, insn->op1) && + ir_may_promote_d2f(ctx, insn->op2); default: break; } } - IR_ASSERT(0); - return ref; + return 0; } -static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) +static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) { ir_insn *insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->type == IR_FLOAT); if (IR_IS_CONST_REF(ref)) { - return !IR_IS_SYM_CONST(insn->op); + return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f; } else { switch (insn->op) { - case IR_ZEXT: - case IR_SEXT: - return ctx->ir_base[insn->op1].type == type; + case IR_FP2FP: + return 1; + case IR_INT2FP: + return ctx->use_lists[ref].count == 1; case IR_NEG: case IR_ABS: - case IR_NOT: return ctx->use_lists[ref].count == 1 && - ir_may_promote_i2i(ctx, type, insn->op1); + ir_may_promote_f2d(ctx, insn->op1); case IR_ADD: case IR_SUB: case IR_MUL: // case IR_DIV: case IR_MIN: case IR_MAX: - case IR_OR: - case IR_AND: - case IR_XOR: return ctx->use_lists[ref].count == 1 && - ir_may_promote_i2i(ctx, type, insn->op1) && - ir_may_promote_i2i(ctx, type, insn->op2); + ir_may_promote_f2d(ctx, insn->op1) && + ir_may_promote_f2d(ctx, insn->op2); default: break; } @@ -989,7 +1243,178 @@ static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) return 0; } -static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) +static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t count; + + IR_ASSERT(insn->type == IR_DOUBLE); + if (IR_IS_CONST_REF(ref)) { + return ir_const_float(ctx, (float)insn->val.d); + } else { + switch (insn->op) { + case IR_FP2FP: + count = ctx->use_lists[ref].count; + ir_use_list_remove_all(ctx, ref, use); + if (ctx->use_lists[ref].count == 0) { + ir_use_list_replace_one(ctx, insn->op1, ref, use); + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + ref = insn->op1; + MAKE_NOP(insn); + return ref; + } else { + ir_use_list_add(ctx, insn->op1, use); + count -= ctx->use_lists[ref].count; + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + } + return insn->op1; +// case IR_INT2FP: +// insn->type = IR_FLOAT; +// return ref; + case IR_NEG: + case IR_ABS: + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + insn->type = IR_FLOAT; + return ref; + case IR_ADD: + case IR_SUB: + case IR_MUL: + case IR_DIV: + case IR_MIN: + case IR_MAX: + if (insn->op1 == insn->op2) { + insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + } else { + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + insn->op2 = ir_promote_d2f(ctx, insn->op2, ref); + } + insn->type = IR_FLOAT; + return ref; + default: + break; + } + } + IR_ASSERT(0); + return ref; +} + +static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t count; + ir_ref old_ref; + + IR_ASSERT(insn->type == IR_FLOAT); + if (IR_IS_CONST_REF(ref)) { + return ir_const_double(ctx, (double)insn->val.f); + } else { + switch (insn->op) { + case IR_FP2FP: + count = ctx->use_lists[ref].count; + ir_use_list_remove_all(ctx, ref, use); + if (ctx->use_lists[ref].count == 0) { + ir_use_list_replace_one(ctx, insn->op1, ref, use); + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + ref = insn->op1; + MAKE_NOP(insn); + return ref; + } else { + ir_use_list_add(ctx, insn->op1, use); + count -= ctx->use_lists[ref].count; + if (count > 1) { + do { + ir_use_list_add(ctx, insn->op1, use); + } while (--count > 1); + } + } + return insn->op1; + case IR_INT2FP: + old_ref = ir_find1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1); + if (old_ref) { + IR_ASSERT(ctx->use_lists[ref].count == 1); + ir_use_list_remove_one(ctx, insn->op1, ref); + CLEAR_USES(ref); + MAKE_NOP(insn); + ir_use_list_add(ctx, old_ref, use); + return old_ref; + } + insn->type = IR_DOUBLE; + return ref; + case IR_NEG: + case IR_ABS: + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + insn->type = IR_DOUBLE; + return ref; + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + if (insn->op1 == insn->op2) { + insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + } else { + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + insn->op2 = ir_promote_f2d(ctx, insn->op2, ref); + } + insn->type = IR_DOUBLE; + return ref; + default: + break; + } + } + IR_ASSERT(0); + return ref; +} + +static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + if (IR_IS_CONST_REF(ref)) { + return !IR_IS_SYM_CONST(insn->op); + } else { + switch (insn->op) { + case IR_ZEXT: + case IR_SEXT: + return ctx->ir_base[insn->op1].type == type; + case IR_NEG: + case IR_ABS: + case IR_NOT: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_i2i(ctx, type, insn->op1); + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + case IR_OR: + case IR_AND: + case IR_XOR: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_i2i(ctx, type, insn->op1) && + ir_may_promote_i2i(ctx, type, insn->op2); + default: + break; + } + } + return 0; +} + +static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) { ir_insn *insn = &ctx->ir_base[ref]; uint32_t count; @@ -1201,7 +1626,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi } } - ir_sccp_replace_insn2(ctx, ext_ref, ref, worklist); + ir_iter_replace_insn(ctx, ext_ref, ref, worklist); phi_insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(phi_insn->op2) @@ -2061,495 +2486,171 @@ static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqu } else { /* IF Split * - * | | | | - * | END | END - * END / END | - * | +---+ | | - * | / | | - * MERGE | | - * | \ | | - * | PHI(C1, X) | | - * | | | + - * | CMP(_, C2) | / - * | / | / - * IF => | / - * | \ | / - * | +------+ | / - * | IF_TRUE | / BEGIN(unreachable) - * IF_FALSE | MERGE | - * | | - */ - - ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); - ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref); - - MAKE_NOP(merge); CLEAR_USES(merge_ref); - MAKE_NOP(phi); CLEAR_USES(phi_ref); - MAKE_NOP(cond); CLEAR_USES(cond_ref); - MAKE_NOP(insn); CLEAR_USES(ref); - - if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); - if_false->op1 = end1_ref; - if_false->op2 = end2_ref; - - if_true->optx = IR_BEGIN; - if_true->op1 = IR_UNUSED; - - ctx->flags2 &= ~IR_CFG_REACHABLE; - - ir_bitqueue_add(worklist, if_false_ref); - - return 1; - } - } else { - /* IF Split - * - * | | | | - * | END | IF<----+ - * END / END / \ | - * | +---+ | +--+ + | - * | / | / | | - * MERGE | IF_FALSE | | - * | \ | | | | - * | PHI(C1, X) | | | | - * | | | | | | - * | CMP(_, C2) | | | CMP(X, C2) - * | / | | | - * IF => | END | - * | \ | | | - * | +------+ | | | - * | IF_TRUE | | IF_TRUE - * IF_FALSE | MERGE - * | | - */ - - ir_use_list_remove_all(ctx, merge_ref, phi_ref); - ir_use_list_remove_all(ctx, ref, if_true_ref); - if (!IR_IS_CONST_REF(phi->op3)) { - ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2); - } - ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); - ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref); - ir_use_list_add(ctx, end2_ref, if_true_ref); - - end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); - end2->op2 = insn->op2; - - merge->optx = IR_OPTX(op, IR_VOID, 1); - merge->op1 = end2_ref; - merge->op2 = IR_UNUSED; - - cond->op1 = phi->op3; - MAKE_NOP(phi); - CLEAR_USES(phi_ref); - - insn->optx = IR_OPTX(IR_END, IR_VOID, 1); - insn->op1 = merge_ref; - insn->op2 = IR_UNUSED; - - if_true->op1 = end2_ref; - - if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); - if_false->op1 = end1_ref; - if_false->op2 = ref; - - ir_bitqueue_add(worklist, if_false_ref); - if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) { - ir_bitqueue_add(worklist, end2->op1); - } - - return 1; - } - } - } - } - } - - return 0; -} - -static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist) -{ - ir_use_list *use_list = &ctx->use_lists[merge_ref]; - - if (use_list->count == 1) { - ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist); - } else if (use_list->count == 2) { - if (merge->inputs_count == 2) { - ir_ref phi_ref = ctx->use_edges[use_list->refs]; - ir_insn *phi = &ctx->ir_base[phi_ref]; - - ir_ref next_ref = ctx->use_edges[use_list->refs + 1]; - ir_insn *next = &ctx->ir_base[next_ref]; - - if (next->op == IR_PHI) { - SWAP_REFS(phi_ref, next_ref); - SWAP_INSNS(phi, next); - } - - if (phi->op == IR_PHI && next->op != IR_PHI) { - if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) { - if (next->op2 == phi_ref) { - if (ir_try_split_if(ctx, next_ref, next, worklist)) { - return; - } - } else { - ir_insn *cmp = &ctx->ir_base[next->op2]; - - if (cmp->op >= IR_EQ && cmp->op <= IR_UGT - && cmp->op1 == phi_ref - && IR_IS_CONST_REF(cmp->op2) - && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op) - && ctx->use_lists[next->op2].count == 1) { - if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) { - return; - } - } - } - } - ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist); - } - } - } -} - -int ir_sccp(ir_ctx *ctx) -{ - ir_ref i, j, n, *p, use; - ir_use_list *use_list; - ir_insn *insn, *use_insn, *value; - uint32_t flags; - ir_bitqueue worklist, worklist2; - ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); - - ctx->flags2 |= IR_OPT_IN_SCCP; - - /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ - ir_bitqueue_init(&worklist2, ctx->insns_count); - ir_bitqueue_init(&worklist, ctx->insns_count); - worklist.pos = 0; - ir_bitset_incl(worklist.set, 1); - while ((i = ir_bitqueue_pop(&worklist)) >= 0) { - insn = &ctx->ir_base[i]; - flags = ir_op_flags[insn->op]; - if (flags & IR_OP_FLAG_DATA) { - if (ctx->use_lists[i].count == 0) { - /* dead code */ - continue; - } else if (insn->op == IR_PHI) { - if (!ir_sccp_meet_phi(ctx, _values, i, insn, &worklist)) { - continue; - } - } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { - bool may_benefit = 0; - bool has_top = 0; - - IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); - n = IR_INPUT_EDGES_COUNT(flags); - for (p = insn->ops + 1; n > 0; p++, n--) { - ir_ref input = *p; - if (input > 0) { - if (_values[input].optx == IR_TOP) { - has_top = 1; - /* do backward propagaton only once */ - if (!_values[input].op1) { - _values[input].op1 = 1; - ir_bitqueue_add(&worklist, input); - } - } else if (_values[input].optx != IR_BOTTOM) { - /* Perform folding only if some of direct inputs - * is going to be replaced by a constant or copy. - * This approach may miss some folding optimizations - * dependent on indirect inputs. e.g. reassociation. - */ - may_benefit = 1; - } - } - } - if (has_top) { - continue; - } - if (!may_benefit) { - IR_MAKE_BOTTOM(i); - if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC - || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { - ir_bitqueue_add(&worklist2, i); - } - } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { - /* not changed */ - continue; - } else if (_values[i].optx == IR_BOTTOM) { - insn = &ctx->ir_base[i]; - if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC - || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { - ir_bitqueue_add(&worklist2, i); - } - } - } else { - IR_MAKE_BOTTOM(i); - } - } else if (flags & IR_OP_FLAG_BB_START) { - if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { - ir_bitqueue_add(&worklist2, i); - } - if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { - ir_ref unfeasible_inputs = 0; - - n = insn->inputs_count; - if (n > 3 && _values[i].optx == IR_TOP) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - } - for (p = insn->ops + 1; n > 0; p++, n--) { - ir_ref input = *p; - IR_ASSERT(input > 0); - if (_values[input].optx == IR_TOP) { - unfeasible_inputs++; - } - } - if (unfeasible_inputs == 0) { - IR_MAKE_BOTTOM(i); - } else if (_values[i].op1 != unfeasible_inputs) { - _values[i].optx = IR_MERGE; - _values[i].op1 = unfeasible_inputs; - } else { - continue; - } - } else { - IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1)); - IR_MAKE_BOTTOM(i); - } - } else { - IR_ASSERT(insn->op1 > 0); - if (_values[insn->op1].optx == IR_TOP) { - /* control inpt is not feasible */ - continue; - } - if (insn->op == IR_IF) { - if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(&worklist, insn->op2); - } - continue; - } - if (!IR_IS_BOTTOM(insn->op2) -#if IR_COMBO_COPY_PROPAGATION - && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) -#endif - ) { - bool b = ir_sccp_is_true(ctx, _values, insn->op2); - use_list = &ctx->use_lists[i]; - IR_ASSERT(use_list->count == 2); - p = &ctx->use_edges[use_list->refs]; - use = *p; - use_insn = &ctx->ir_base[use]; - IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); - if ((use_insn->op == IR_IF_TRUE) != b) { - use = *(p+1); - IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); - } - if (_values[i].optx == IR_TOP) { - _values[i].optx = IR_IF; - _values[i].op1 = use; - } else if (_values[i].optx != IR_IF || _values[i].op1 != use) { - IR_MAKE_BOTTOM(i); - } - if (!IR_IS_BOTTOM(use)) { - ir_bitqueue_add(&worklist, use); - } - continue; - } - IR_MAKE_BOTTOM(i); - } else if (insn->op == IR_SWITCH) { - if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(&worklist, insn->op2); - } - continue; - } - if (!IR_IS_BOTTOM(insn->op2) -#if IR_COMBO_COPY_PROPAGATION - && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) -#endif - ) { - ir_ref use_case = IR_UNUSED; + * | | | | + * | END | END + * END / END | + * | +---+ | | + * | / | | + * MERGE | | + * | \ | | + * | PHI(C1, X) | | + * | | | + + * | CMP(_, C2) | / + * | / | / + * IF => | / + * | \ | / + * | +------+ | / + * | IF_TRUE | / BEGIN(unreachable) + * IF_FALSE | MERGE | + * | | + */ - use_list = &ctx->use_lists[i]; - n = use_list->count; - for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - IR_ASSERT(use > 0); - use_insn = &ctx->ir_base[use]; - if (use_insn->op == IR_CASE_VAL) { - if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { - use_case = use; - break; - } - } else if (use_insn->op == IR_CASE_DEFAULT) { - use_case = use; - } - } - if (use_case) { - use_insn = &ctx->ir_base[use_case]; - if (_values[i].optx == IR_TOP) { - _values[i].optx = IR_IF; - _values[i].op1 = use_case; - } else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) { - IR_MAKE_BOTTOM(i); - } - if (!IR_IS_BOTTOM(use_case)) { - ir_bitqueue_add(&worklist, use_case); + ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref); + + MAKE_NOP(merge); CLEAR_USES(merge_ref); + MAKE_NOP(phi); CLEAR_USES(phi_ref); + MAKE_NOP(cond); CLEAR_USES(cond_ref); + MAKE_NOP(insn); CLEAR_USES(ref); + + if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); + if_false->op1 = end1_ref; + if_false->op2 = end2_ref; + + if_true->optx = IR_BEGIN; + if_true->op1 = IR_UNUSED; + + ctx->flags2 &= ~IR_CFG_REACHABLE; + + ir_bitqueue_add(worklist, if_false_ref); + + return 1; } - } - if (!IR_IS_BOTTOM(i)) { - continue; - } - } - IR_MAKE_BOTTOM(i); - } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) { - /* dead load */ - _values[i].optx = IR_LOAD; - } else { - IR_MAKE_BOTTOM(i); + } else { + /* IF Split + * + * | | | | + * | END | IF<----+ + * END / END / \ | + * | +---+ | +--+ + | + * | / | / | | + * MERGE | IF_FALSE | | + * | \ | | | | + * | PHI(C1, X) | | | | + * | | | | | | + * | CMP(_, C2) | | | CMP(X, C2) + * | / | | | + * IF => | END | + * | \ | | | + * | +------+ | | | + * | IF_TRUE | | IF_TRUE + * IF_FALSE | MERGE + * | | + */ - /* control, call, load and store instructions may have unprocessed inputs */ - n = IR_INPUT_EDGES_COUNT(flags); - if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - for (j = 2, p = insn->ops + j; j <= n; j++, p++) { - IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); - use = *p; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); + ir_use_list_remove_all(ctx, merge_ref, phi_ref); + ir_use_list_remove_all(ctx, ref, if_true_ref); + if (!IR_IS_CONST_REF(phi->op3)) { + ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2); } - } - } else if (n >= 2) { - IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); - use = insn->op2; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); - } - if (n > 2) { - IR_ASSERT(n == 3); - IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); - use = insn->op3; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); + ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref); + ir_use_list_add(ctx, end2_ref, if_true_ref); + + end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); + end2->op2 = insn->op2; + + merge->optx = IR_OPTX(op, IR_VOID, 1); + merge->op1 = end2_ref; + merge->op2 = IR_UNUSED; + + cond->op1 = phi->op3; + MAKE_NOP(phi); + CLEAR_USES(phi_ref); + + insn->optx = IR_OPTX(IR_END, IR_VOID, 1); + insn->op1 = merge_ref; + insn->op2 = IR_UNUSED; + + if_true->op1 = end2_ref; + + if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); + if_false->op1 = end1_ref; + if_false->op2 = ref; + + ir_bitqueue_add(worklist, if_false_ref); + if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) { + ir_bitqueue_add(worklist, end2->op1); } + + return 1; } } } } - use_list = &ctx->use_lists[i]; - n = use_list->count; - for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { - use = *p; - if (_values[use].optx != IR_BOTTOM) { - ir_bitqueue_add(&worklist, use); - } - } } -#ifdef IR_DEBUG - if (ctx->flags & IR_DEBUG_SCCP) { - for (i = 1; i < ctx->insns_count; i++) { - if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { - fprintf(stderr, "%d. CONST(", i); - ir_print_const(ctx, &_values[i], stderr, true); - fprintf(stderr, ")\n"); -#if IR_COMBO_COPY_PROPAGATION - } else if (_values[i].op == IR_COPY) { - fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); -#endif - } else if (IR_IS_TOP(i)) { - fprintf(stderr, "%d. TOP\n", i); - } else if (_values[i].op == IR_IF) { - fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); - } else if (_values[i].op == IR_MERGE) { - fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); - } else if (!IR_IS_BOTTOM(i)) { - fprintf(stderr, "%d. %d\n", i, _values[i].op); + return 0; +} + +static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist) +{ + ir_use_list *use_list = &ctx->use_lists[merge_ref]; + + if (use_list->count == 1) { + ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist); + } else if (use_list->count == 2) { + if (merge->inputs_count == 2) { + ir_ref phi_ref = ctx->use_edges[use_list->refs]; + ir_insn *phi = &ctx->ir_base[phi_ref]; + + ir_ref next_ref = ctx->use_edges[use_list->refs + 1]; + ir_insn *next = &ctx->ir_base[next_ref]; + + if (next->op == IR_PHI) { + SWAP_REFS(phi_ref, next_ref); + SWAP_INSNS(phi, next); } - } - } -#endif - for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { - if (value->op == IR_BOTTOM) { - continue; - } else if (IR_IS_CONST_OP(value->op)) { - /* replace instruction by constant */ - j = ir_const(ctx, value->val, value->type); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); - } else if (IR_IS_SYM_CONST(value->op)) { - /* replace instruction by constant */ - j = ir_const_ex(ctx, value->val, value->type, value->optx); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); -#if IR_COMBO_COPY_PROPAGATION - } else if (value->op == IR_COPY) { - ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist2); -#endif - } else if (value->op == IR_TOP) { - /* remove unreachable instruction */ - insn = &ctx->ir_base[i]; - if (insn->op == IR_NOP) { - /* already removed */ - } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { - if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { - ir_sccp_remove_insn(ctx, _values, i, &worklist2); - } - } else { - if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { - /* remove from terminators list */ - ir_ref prev = ctx->ir_base[1].op1; - if (prev == i) { - ctx->ir_base[1].op1 = insn->op3; + if (phi->op == IR_PHI && next->op != IR_PHI) { + if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) { + if (next->op2 == phi_ref) { + if (ir_try_split_if(ctx, next_ref, next, worklist)) { + return; + } } else { - while (prev) { - if (ctx->ir_base[prev].op3 == i) { - ctx->ir_base[prev].op3 = insn->op3; - break; + ir_insn *cmp = &ctx->ir_base[next->op2]; + + if (cmp->op >= IR_EQ && cmp->op <= IR_UGT + && cmp->op1 == phi_ref + && IR_IS_CONST_REF(cmp->op2) + && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op) + && ctx->use_lists[next->op2].count == 1) { + if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) { + return; } - prev = ctx->ir_base[prev].op3; } } } - ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, &worklist2); + ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist); } - } else if (value->op == IR_IF) { - /* remove one way IF/SWITCH */ - ir_sccp_remove_if(ctx, _values, i, value->op1); - } else if (value->op == IR_MERGE) { - /* schedule merge to remove unfeasible MERGE inputs */ - ir_bitqueue_add(&worklist, i); - } else if (value->op == IR_LOAD) { - /* schedule dead load elimination */ - ir_bitqueue_add(&worklist2, i); } } +} - while ((i = ir_bitqueue_pop(&worklist)) >= 0) { - IR_ASSERT(_values[i].op == IR_MERGE); - ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); - } - - ctx->flags2 |= IR_CFG_REACHABLE; +static void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) +{ + ir_ref i; + ir_insn *insn; - while ((i = ir_bitqueue_pop(&worklist2)) >= 0) { + while ((i = ir_bitqueue_pop(worklist)) >= 0) { insn = &ctx->ir_base[i]; if (IR_IS_FOLDABLE_OP(insn->op)) { if (ctx->use_lists[i].count == 0) { if (insn->op == IR_PHI) { - ir_bitqueue_add(&worklist2, insn->op1); + ir_bitqueue_add(worklist, insn->op1); } - ir_sccp_remove_insn2(ctx, i, &worklist2); + ir_iter_remove_insn(ctx, i, worklist); } else { insn = &ctx->ir_base[i]; switch (insn->op) { @@ -2558,14 +2659,14 @@ int ir_sccp(ir_ctx *ctx) if (ir_may_promote_d2f(ctx, insn->op1)) { ir_ref ref = ir_promote_d2f(ctx, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } } else { if (ir_may_promote_f2d(ctx, insn->op1)) { ir_ref ref = ir_promote_f2d(ctx, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } } @@ -2585,13 +2686,13 @@ int ir_sccp(ir_ctx *ctx) if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) { ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } goto folding; case IR_SEXT: case IR_ZEXT: - if (ir_try_promote_ext(ctx, i, insn, &worklist2)) { + if (ir_try_promote_ext(ctx, i, insn, worklist)) { break; } goto folding; @@ -2599,7 +2700,7 @@ int ir_sccp(ir_ctx *ctx) break; default: folding: - ir_sccp_fold2(ctx, i, &worklist2); + ir_iter_fold(ctx, i, worklist); break; } } @@ -2609,10 +2710,10 @@ int ir_sccp(ir_ctx *ctx) } else if (insn->op == IR_BEGIN) { if (ctx->ir_base[insn->op1].op == IR_END && ctx->use_lists[i].count == 1) { - ir_merge_blocks(ctx, insn->op1, i, &worklist2); + ir_merge_blocks(ctx, insn->op1, i, worklist); } } else if (insn->op == IR_MERGE) { - ir_optimize_merge(ctx, i, insn, &worklist2); + ir_optimize_merge(ctx, i, insn, worklist); } } else if (ir_is_dead_load(ctx, i)) { ir_ref next = ctx->use_edges[ctx->use_lists[i].refs]; @@ -2621,16 +2722,34 @@ int ir_sccp(ir_ctx *ctx) ctx->ir_base[next].op1 = insn->op1; ir_use_list_replace_one(ctx, insn->op1, i, next); insn->op1 = IR_UNUSED; - ir_sccp_remove_insn2(ctx, i, &worklist2); + ir_iter_remove_insn(ctx, i, worklist); } } +} + +int ir_sccp(ir_ctx *ctx) +{ + ir_bitqueue sccp_worklist, iter_worklist; + ir_insn *_values; + + ctx->flags2 |= IR_OPT_IN_SCCP; + ir_bitqueue_init(&iter_worklist, ctx->insns_count); + ir_bitqueue_init(&sccp_worklist, ctx->insns_count); + _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); + + ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist); + ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist); ir_mem_free(_values); - ir_bitqueue_free(&worklist); - ir_bitqueue_free(&worklist2); + ir_bitqueue_free(&sccp_worklist); + + ctx->flags2 |= IR_CFG_REACHABLE; + + ir_iter_opt(ctx, &iter_worklist); + + ir_bitqueue_free(&iter_worklist); ctx->flags2 &= ~IR_OPT_IN_SCCP; - ctx->flags2 |= IR_SCCP_DONE; return 1; } diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 58c6ed40f7df2..dce15b5be3b89 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -3090,9 +3090,16 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); + ir_insn *var_insn; + int32_t offset; IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); + var_insn = &ctx->ir_base[src]; + if (var_insn->op == IR_VADDR) { + var_insn = &ctx->ir_base[var_insn->op1]; + } + IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if (offset == 0) { | mov Ra(reg), Ra(base) } else { @@ -7569,7 +7576,11 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) mem = ir_var_spill_slot(ctx, insn->op1); fp = IR_MEM_BASE(mem); offset = IR_MEM_OFFSET(mem); - | lea Ra(def_reg), aword [Ra(fp)+offset] + if (offset == 0) { + | mov Ra(def_reg), Ra(fp) + } else { + | lea Ra(def_reg), aword [Ra(fp)+offset] + } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } @@ -10237,10 +10248,15 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_ref *ops = insn->ops; IR_REGSET_EXCL(available, reg); - if (constraints.tmp_regs[n].num > 0 - && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { - /* rematerialization */ - reg |= IR_REG_SPILL_LOAD; + if (constraints.tmp_regs[n].num > 0) { + if (IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { + /* rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } else if (ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_ALLOCA || + ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_VADDR) { + /* local address rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {