diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 2aa6d02a67f9..16808b9d7162 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -2544,8 +2544,7 @@ pub const Const = struct { const bits_per_limb = @bitSizeOf(Limb); while (i != 0) { i -= 1; - const limb = a.limbs[i]; - const this_limb_lz = @clz(limb); + const this_limb_lz = @clz(a.limbs[i]); total_limb_lz += this_limb_lz; if (this_limb_lz != bits_per_limb) break; } @@ -2557,6 +2556,7 @@ pub const Const = struct { pub fn ctz(a: Const, bits: Limb) Limb { // Limbs are stored in little-endian order. Converting a negative number to twos-complement // flips all bits above the lowest set bit, which does not affect the trailing zero count. + if (a.eqlZero()) return bits; var result: Limb = 0; for (a.limbs) |limb| { const limb_tz = @ctz(limb); diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig index 811cf98d7387..3118e860b990 100644 --- a/lib/std/math/big/int_test.zig +++ b/lib/std/math/big/int_test.zig @@ -3332,3 +3332,227 @@ test "(BigInt) negative" { try testing.expect(mem.eql(u8, a_fmt, "(BigInt)")); try testing.expect(!mem.eql(u8, b_fmt, "(BigInt)")); } + +test "clz" { + const neg_limb_max_squared: std.math.big.int.Const = .{ + .limbs = &.{ 1, maxInt(Limb) - 1 }, + .positive = false, + }; + try testing.expect(neg_limb_max_squared.clz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const neg_limb_max_squared_plus_one: std.math.big.int.Const = .{ + .limbs = &.{ 0, maxInt(Limb) - 1 }, + .positive = false, + }; + try testing.expect(neg_limb_max_squared_plus_one.clz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const neg_limb_msb_squared: std.math.big.int.Const = .{ + .limbs = &.{ 0, 1 << @bitSizeOf(Limb) - 2 }, + .positive = false, + }; + try testing.expect(neg_limb_msb_squared.clz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(neg_limb_msb_squared.clz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const neg_limb_max: std.math.big.int.Const = .{ + .limbs = &.{maxInt(Limb)}, + .positive = false, + }; + try testing.expect(neg_limb_max.clz(@bitSizeOf(Limb) + 1) == 0); + try testing.expect(neg_limb_max.clz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(neg_limb_max.clz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(neg_limb_max.clz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const neg_limb_msb: std.math.big.int.Const = .{ + .limbs = &.{1 << @bitSizeOf(Limb) - 1}, + .positive = false, + }; + try testing.expect(neg_limb_msb.clz(@bitSizeOf(Limb)) == 0); + try testing.expect(neg_limb_msb.clz(@bitSizeOf(Limb) + 1) == 0); + try testing.expect(neg_limb_msb.clz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(neg_limb_msb.clz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(neg_limb_msb.clz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const neg_one: std.math.big.int.Const = .{ + .limbs = &.{1}, + .positive = false, + }; + try testing.expect(neg_one.clz(@bitSizeOf(Limb)) == 0); + try testing.expect(neg_one.clz(@bitSizeOf(Limb) + 1) == 0); + try testing.expect(neg_one.clz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(neg_one.clz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(neg_one.clz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const zero: std.math.big.int.Const = .{ + .limbs = &.{0}, + .positive = true, + }; + try testing.expect(zero.clz(@bitSizeOf(Limb)) == @bitSizeOf(Limb)); + try testing.expect(zero.clz(@bitSizeOf(Limb) + 1) == @bitSizeOf(Limb) + 1); + try testing.expect(zero.clz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) * 2 - 1); + try testing.expect(zero.clz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) * 2); + try testing.expect(zero.clz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) * 2 + 1); + + const one: std.math.big.int.Const = .{ + .limbs = &.{1}, + .positive = true, + }; + try testing.expect(one.clz(@bitSizeOf(Limb)) == @bitSizeOf(Limb) - 1); + try testing.expect(one.clz(@bitSizeOf(Limb) + 1) == @bitSizeOf(Limb)); + try testing.expect(one.clz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) * 2 - 2); + try testing.expect(one.clz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) * 2 - 1); + try testing.expect(one.clz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) * 2); + + const limb_msb: std.math.big.int.Const = .{ + .limbs = &.{1 << @bitSizeOf(Limb) - 1}, + .positive = true, + }; + try testing.expect(limb_msb.clz(@bitSizeOf(Limb)) == 0); + try testing.expect(limb_msb.clz(@bitSizeOf(Limb) + 1) == 1); + try testing.expect(limb_msb.clz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) - 1); + try testing.expect(limb_msb.clz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb)); + try testing.expect(limb_msb.clz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) + 1); + + const limb_max: std.math.big.int.Const = .{ + .limbs = &.{maxInt(Limb)}, + .positive = true, + }; + try testing.expect(limb_max.clz(@bitSizeOf(Limb)) == 0); + try testing.expect(limb_max.clz(@bitSizeOf(Limb) + 1) == 1); + try testing.expect(limb_max.clz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) - 1); + try testing.expect(limb_max.clz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb)); + try testing.expect(limb_max.clz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) + 1); + + const limb_msb_squared: std.math.big.int.Const = .{ + .limbs = &.{ 0, 1 << @bitSizeOf(Limb) - 2 }, + .positive = true, + }; + try testing.expect(limb_msb_squared.clz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(limb_msb_squared.clz(@bitSizeOf(Limb) * 2) == 1); + try testing.expect(limb_msb_squared.clz(@bitSizeOf(Limb) * 2 + 1) == 2); + + const limb_max_squared_minus_one: std.math.big.int.Const = .{ + .limbs = &.{ 0, maxInt(Limb) - 1 }, + .positive = true, + }; + try testing.expect(limb_max_squared_minus_one.clz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(limb_max_squared_minus_one.clz(@bitSizeOf(Limb) * 2 + 1) == 1); + + const limb_max_squared: std.math.big.int.Const = .{ + .limbs = &.{ 1, maxInt(Limb) - 1 }, + .positive = true, + }; + try testing.expect(limb_max_squared.clz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(limb_max_squared.clz(@bitSizeOf(Limb) * 2 + 1) == 1); +} + +test "ctz" { + const neg_limb_max_squared: std.math.big.int.Const = .{ + .limbs = &.{ 1, maxInt(Limb) - 1 }, + .positive = false, + }; + try testing.expect(neg_limb_max_squared.ctz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const neg_limb_max_squared_plus_one: std.math.big.int.Const = .{ + .limbs = &.{ 0, maxInt(Limb) - 1 }, + .positive = false, + }; + try testing.expect(neg_limb_max_squared_plus_one.ctz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) + 1); + + const neg_limb_msb_squared: std.math.big.int.Const = .{ + .limbs = &.{ 0, 1 << @bitSizeOf(Limb) - 2 }, + .positive = false, + }; + try testing.expect(neg_limb_msb_squared.ctz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) * 2 - 2); + try testing.expect(neg_limb_msb_squared.ctz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) * 2 - 2); + + const neg_limb_max: std.math.big.int.Const = .{ + .limbs = &.{maxInt(Limb)}, + .positive = false, + }; + try testing.expect(neg_limb_max.ctz(@bitSizeOf(Limb) + 1) == 0); + try testing.expect(neg_limb_max.ctz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(neg_limb_max.ctz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(neg_limb_max.ctz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const neg_limb_msb: std.math.big.int.Const = .{ + .limbs = &.{1 << @bitSizeOf(Limb) - 1}, + .positive = false, + }; + try testing.expect(neg_limb_msb.ctz(@bitSizeOf(Limb)) == @bitSizeOf(Limb) - 1); + try testing.expect(neg_limb_msb.ctz(@bitSizeOf(Limb) + 1) == @bitSizeOf(Limb) - 1); + try testing.expect(neg_limb_msb.ctz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) - 1); + try testing.expect(neg_limb_msb.ctz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) - 1); + try testing.expect(neg_limb_msb.ctz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) - 1); + + const neg_one: std.math.big.int.Const = .{ + .limbs = &.{1}, + .positive = false, + }; + try testing.expect(neg_one.ctz(@bitSizeOf(Limb)) == 0); + try testing.expect(neg_one.ctz(@bitSizeOf(Limb) + 1) == 0); + try testing.expect(neg_one.ctz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(neg_one.ctz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(neg_one.ctz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const zero: std.math.big.int.Const = .{ + .limbs = &.{0}, + .positive = true, + }; + try testing.expect(zero.ctz(@bitSizeOf(Limb)) == @bitSizeOf(Limb)); + try testing.expect(zero.ctz(@bitSizeOf(Limb) + 1) == @bitSizeOf(Limb) + 1); + try testing.expect(zero.ctz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) * 2 - 1); + try testing.expect(zero.ctz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) * 2); + try testing.expect(zero.ctz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) * 2 + 1); + + const one: std.math.big.int.Const = .{ + .limbs = &.{1}, + .positive = true, + }; + try testing.expect(one.ctz(@bitSizeOf(Limb)) == 0); + try testing.expect(one.ctz(@bitSizeOf(Limb) + 1) == 0); + try testing.expect(one.ctz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(one.ctz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(one.ctz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const limb_msb: std.math.big.int.Const = .{ + .limbs = &.{1 << @bitSizeOf(Limb) - 1}, + .positive = true, + }; + try testing.expect(limb_msb.ctz(@bitSizeOf(Limb)) == @bitSizeOf(Limb) - 1); + try testing.expect(limb_msb.ctz(@bitSizeOf(Limb) + 1) == @bitSizeOf(Limb) - 1); + try testing.expect(limb_msb.ctz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) - 1); + try testing.expect(limb_msb.ctz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) - 1); + try testing.expect(limb_msb.ctz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) - 1); + + const limb_max: std.math.big.int.Const = .{ + .limbs = &.{maxInt(Limb)}, + .positive = true, + }; + try testing.expect(limb_max.ctz(@bitSizeOf(Limb)) == 0); + try testing.expect(limb_max.ctz(@bitSizeOf(Limb) + 1) == 0); + try testing.expect(limb_max.ctz(@bitSizeOf(Limb) * 2 - 1) == 0); + try testing.expect(limb_max.ctz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(limb_max.ctz(@bitSizeOf(Limb) * 2 + 1) == 0); + + const limb_msb_squared: std.math.big.int.Const = .{ + .limbs = &.{ 0, 1 << @bitSizeOf(Limb) - 2 }, + .positive = true, + }; + try testing.expect(limb_msb_squared.ctz(@bitSizeOf(Limb) * 2 - 1) == @bitSizeOf(Limb) * 2 - 2); + try testing.expect(limb_msb_squared.ctz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) * 2 - 2); + try testing.expect(limb_msb_squared.ctz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) * 2 - 2); + + const limb_max_squared_minus_one: std.math.big.int.Const = .{ + .limbs = &.{ 0, maxInt(Limb) - 1 }, + .positive = true, + }; + try testing.expect(limb_max_squared_minus_one.ctz(@bitSizeOf(Limb) * 2) == @bitSizeOf(Limb) + 1); + try testing.expect(limb_max_squared_minus_one.ctz(@bitSizeOf(Limb) * 2 + 1) == @bitSizeOf(Limb) + 1); + + const limb_max_squared: std.math.big.int.Const = .{ + .limbs = &.{ 1, maxInt(Limb) - 1 }, + .positive = true, + }; + try testing.expect(limb_max_squared.ctz(@bitSizeOf(Limb) * 2) == 0); + try testing.expect(limb_max_squared.ctz(@bitSizeOf(Limb) * 2 + 1) == 0); +} diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 203f18585493..107350fc95f8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2418,7 +2418,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(12_600); + @setEvalBranchQuota(12_900); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2475,8 +2475,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .bitcast => try cg.airBitCast(inst), - .ctz => try cg.airCtz(inst), - .popcount => try cg.airPopCount(inst), .bit_reverse => try cg.airBitReverse(inst), .splat => try cg.airSplat(inst), .select => try cg.airSelect(inst), @@ -26978,7 +26976,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_always_tail => try cg.airCall(inst, .always_tail, .{ .safety = true }), .call_never_tail => try cg.airCall(inst, .never_tail, .{ .safety = true }), .call_never_inline => try cg.airCall(inst, .never_inline, .{ .safety = true }), - .clz => |air_tag| if (use_old) try cg.airClz(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); @@ -30084,6 +30081,1293 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }; try res[0].finish(inst, &.{ty_op.operand}, &ops, cg); }, + .ctz => |air_tag| if (use_old) try cg.airCtz(inst) else fallback: { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + if (ty_op.ty.toType().isVector(zcu)) break :fallback try cg.airCtz(inst); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var res: [1]Temp = undefined; + cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .exact_signed_int = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .add, .dst0b, .si(1), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_signed_int = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._c, .in, .dst0b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .si(1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bmi, null, null, null }, + .src_constraints = .{ .{ .exact_int = 16 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .tzcnt, .dst0w, .src0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bmi, .false_deps_lzcnt_tzcnt, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .tzcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bmi, null, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .tzcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bmi, null, null, null }, + .src_constraints = .{ .{ .int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"or", .src0d, .uia(1, .src0, .add_umax), ._, ._ }, + .{ ._, ._, .tzcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi, .false_deps_lzcnt_tzcnt, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .tzcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi, null, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .tzcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi, null, null }, + .src_constraints = .{ .{ .int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .src0q, .ua(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .tzcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._f, .bs, .dst0d, .src0d, ._, ._ }, + .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._f, .bs, .dst0d, .src0d, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(32), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_int = 16 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .si(16), ._, ._ }, + .{ ._, ._f, .bs, .dst0w, .src0w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_int = 32 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .si(32), ._, ._ }, + .{ ._, ._f, .bs, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"or", .src0d, .uia(1, .src0, .add_umax), ._, ._ }, + .{ ._, ._f, .bs, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .si(64), ._, ._ }, + .{ ._, ._f, .bs, .dst0q, .src0q, ._, ._ }, + .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._f, .bs, .dst0q, .src0q, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(64), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_int = 64 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .si(64), ._, ._ }, + .{ ._, ._f, .bs, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .src0q, .ua(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._f, .bs, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi, .false_deps_lzcnt_tzcnt, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .tzcnt, .dst0q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sa(.src0, .add_bit_size_sub_8_size), ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasia(.dst0, .@"8", .tmp0, .add_8_src0_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .tzcnt, .dst0q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sa(.src0, .add_bit_size_sub_8_size), ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasia(.dst0, .@"8", .tmp0, .add_8_src0_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._f, .bs, .dst0q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sa(.src0, .add_bit_size_sub_8_size), ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasia(.dst0, .@"8", .tmp0, .add_8_src0_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._f, .bs, .dst0q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sa(.src0, .add_bit_size_sub_8_size), ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasia(.dst0, .@"8", .tmp0, .add_8_src0_size), ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ + @tagName(air_tag), + cg.typeOf(ty_op.operand).fmt(pt), + ops[0].tracking(cg), + }), + else => |e| return e, + }; + try res[0].finish(inst, &.{ty_op.operand}, &ops, cg); + }, + .popcount => |air_tag| if (use_old) try cg.airPopCount(inst) else fallback: { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + if (ty_op.ty.toType().isVector(zcu)) break :fallback try cg.airPopCount(inst); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var res: [1]Temp = undefined; + cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .exact_signed_int = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .neg, .dst0b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .popcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .byte }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .popcnt, .dst0d, .dst0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .popcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .word }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .popcnt, .dst0w, .src0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .popcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .popcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .popcnt, .false_deps_popcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .popcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .popcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .popcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .popcnt, null, null, null }, + .src_constraints = .{ .{ .signed_int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src0d, .uia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .popcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .popcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, .false_deps_popcnt, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .popcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none, .none } }, + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .popcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src0q, .uia(64, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .popcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_or_exact_int = .byte }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0b, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0b, .ui(0x55), ._, ._ }, + .{ ._, ._, .sub, .tmp0b, .src0b, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0b, .ui(0x33), ._, ._ }, + .{ ._, ._, .@"and", .tmp0b, .ui(0x33), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0b, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0b, .ui(0x0f), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .byte }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src0b, .uia(8, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0b, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0b, .ui(0x55), ._, ._ }, + .{ ._, ._, .sub, .tmp0b, .src0b, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0b, .ui(0x33), ._, ._ }, + .{ ._, ._, .@"and", .tmp0b, .ui(0x33), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0b, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0b, .ui(0x0f), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .fast_imm16, null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .word }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0d, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0w, .ui(0x5555), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0w, .ui(0x3333), ._, ._ }, + .{ ._, ._, .@"and", .tmp0w, .ui(0x3333), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0w, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .@"and", .dst0w, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .lea, .tmp0d, .leai(.tmp0, .dst0), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(8), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .fast_imm16, null, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src0d, .uia(16, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0d, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0w, .ui(0x5555), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0w, .ui(0x3333), ._, ._ }, + .{ ._, ._, .@"and", .tmp0w, .ui(0x3333), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0w, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .@"and", .dst0w, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .lea, .tmp0d, .leai(.tmp0, .dst0), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(8), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_or_exact_int = .word }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0d, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0d, .ui(0x5555), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x3333), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x3333), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .lea, .tmp0d, .leai(.tmp0, .dst0), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(8), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .word }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src0d, .uia(16, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0d, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0d, .ui(0x5555), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x3333), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x3333), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x0f0f), ._, ._ }, + .{ ._, ._, .lea, .tmp0d, .leai(.tmp0, .dst0), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(8), ._, ._ }, + .{ ._, ._, .add, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_or_exact_int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0d, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0d, .ui(0x55555555), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x33333333), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x33333333), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x0f0f0f0f), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x0f0f0f0f), ._, ._ }, + .{ ._, ._, .lea, .tmp0d, .leai(.tmp0, .dst0), ._, ._ }, + .{ ._, .i_, .mul, .dst0d, .tmp0d, .ui(0x01010101), ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(32 - 8), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src0d, .uia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._r, .sh, .src0d, .ui(1), ._, ._ }, + .{ ._, ._, .@"and", .src0d, .ui(0x55555555), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(2), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x33333333), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x33333333), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(4), ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .ui(0x0f0f0f0f), ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .ui(0x0f0f0f0f), ._, ._ }, + .{ ._, ._, .lea, .tmp0d, .leai(.tmp0, .dst0), ._, ._ }, + .{ ._, .i_, .mul, .dst0d, .tmp0d, .ui(0x01010101), ._ }, + .{ ._, ._r, .sh, .dst0d, .ui(32 - 8), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ }, + .{ ._, ._r, .sh, .src0q, .ui(1), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x55555555, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .src0q, .tmp1q, ._, ._ }, + .{ ._, ._, .sub, .tmp0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0q, .tmp0q, ._, ._ }, + .{ ._, ._r, .sh, .tmp0q, .ui(2), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x33333333, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ }, + .{ ._, ._, .lea, .dst0q, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .dst0q, ._, ._ }, + .{ ._, ._r, .sh, .dst0q, .ui(4), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x0f0f0f0f, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._, .lea, .dst0q, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x01010101, .none, .repeat), ._, ._ }, + .{ ._, .i_, .mul, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .dst0q, .ui(64 - 8), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src0q, .uia(64, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ }, + .{ ._, ._r, .sh, .src0q, .ui(1), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x55555555, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .src0q, .tmp1q, ._, ._ }, + .{ ._, ._, .sub, .tmp0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0q, .tmp0q, ._, ._ }, + .{ ._, ._r, .sh, .tmp0q, .ui(2), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x33333333, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ }, + .{ ._, ._, .lea, .dst0q, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .dst0q, ._, ._ }, + .{ ._, ._r, .sh, .dst0q, .ui(4), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x0f0f0f0f, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .tmp1q, ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._, .lea, .dst0q, .leai(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .uia(0x01010101, .none, .repeat), ._, ._ }, + .{ ._, .i_, .mul, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .dst0q, .ui(64 - 8), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, .false_deps_popcnt, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .popcnt, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ .@"0:", ._, .popcnt, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, .false_deps_popcnt, null }, + .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .memad(.src0q, .add_size, -16), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_size), ._, ._ }, + .{ ._, ._l, .sh, .dst0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ }, + .{ ._, ._, .popcnt, .dst0q, .dst0q, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .popcnt, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, null, null }, + .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .memad(.src0q, .add_size, -16), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-24, .src0, .add_size), ._, ._ }, + .{ ._, ._l, .sh, .dst0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ }, + .{ ._, ._, .popcnt, .dst0q, .dst0q, ._, ._ }, + .{ .@"0:", ._, .popcnt, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, .false_deps_popcnt, null }, + .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .memad(.src0q, .add_size, -8), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._l, .sh, .dst0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ }, + .{ ._, ._, .popcnt, .dst0q, .dst0q, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .popcnt, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .popcnt, null, null }, + .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .memad(.src0q, .add_size, -8), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._l, .sh, .dst0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ }, + .{ ._, ._, .popcnt, .dst0q, .dst0q, ._, ._ }, + .{ .@"0:", ._, .popcnt, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(1), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x55555555, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .sub, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .tmp2q, ._, ._ }, + .{ ._, ._r, .sh, .tmp2q, .ui(2), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x33333333, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .tmp3q, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leai(.tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(4), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x0f0f0f0f, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leai(.tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x01010101, .none, .repeat), ._, ._ }, + .{ ._, .i_, .mul, .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(64 - 8), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -16), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._l, .sh, .tmp1q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._mp, .j, .@"1f", ._, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ .@"1:", ._, .mov, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(1), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x55555555, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .sub, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .tmp2q, ._, ._ }, + .{ ._, ._r, .sh, .tmp2q, .ui(2), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x33333333, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .tmp3q, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leai(.tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(4), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x0f0f0f0f, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leai(.tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x01010101, .none, .repeat), ._, ._ }, + .{ ._, .i_, .mul, .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(64 - 8), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._l, .sh, .tmp1q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._mp, .j, .@"1f", ._, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ .@"1:", ._, .mov, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(1), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x55555555, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .sub, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .tmp2q, ._, ._ }, + .{ ._, ._r, .sh, .tmp2q, .ui(2), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x33333333, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .tmp3q, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leai(.tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(4), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x0f0f0f0f, .none, .repeat), ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leai(.tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .uia(0x01010101, .none, .repeat), ._, ._ }, + .{ ._, .i_, .mul, .tmp1q, .tmp3q, ._, ._ }, + .{ ._, ._r, .sh, .tmp1q, .ui(64 - 8), ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leai(.dst0, .tmp1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ + @tagName(air_tag), + cg.typeOf(ty_op.operand).fmt(pt), + ops[0].tracking(cg), + }), + else => |e| return e, + }; + try res[0].finish(inst, &.{ty_op.operand}, &ops, cg); + }, .byte_swap => |air_tag| if (use_old) try cg.airByteSwap(inst) else fallback: { const ty_op = air_datas[@intFromEnum(inst)].ty_op; if (ty_op.ty.toType().isVector(zcu)) break :fallback try cg.airByteSwap(inst); @@ -99931,6 +101215,7 @@ const Select = struct { unaligned_size, bit_size, src0_bit_size, + @"8_size_sub_bit_size", len, elem_limbs, elem_size, @@ -99942,6 +101227,7 @@ const Select = struct { smin, smax, umax, + repeat, }, op: enum(u2) { mul, div, div_8_down, rem_8_mul }, rhs: Memory.Scale, @@ -99958,6 +101244,7 @@ const Select = struct { const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" }; const sub_src0_size_div_8: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .div, .rhs = .@"8" }; const sub_src0_size: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .mul, .rhs = .@"1" }; + const add_8_src0_size: Adjust = .{ .sign = .pos, .lhs = .src0_size, .op = .mul, .rhs = .@"8" }; const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" }; const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" }; const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" }; @@ -99972,6 +101259,7 @@ const Select = struct { const sub_bit_size: Adjust = .{ .sign = .neg, .lhs = .bit_size, .op = .mul, .rhs = .@"1" }; const add_src0_bit_size: Adjust = .{ .sign = .pos, .lhs = .src0_bit_size, .op = .mul, .rhs = .@"1" }; const sub_src0_bit_size: Adjust = .{ .sign = .neg, .lhs = .src0_bit_size, .op = .mul, .rhs = .@"1" }; + const add_bit_size_sub_8_size: Adjust = .{ .sign = .neg, .lhs = .@"8_size_sub_bit_size", .op = .mul, .rhs = .@"1" }; const add_8_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"8" }; const add_4_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"4" }; const add_3_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"3" }; @@ -100000,6 +101288,7 @@ const Select = struct { const add_elem_limbs: Adjust = .{ .sign = .pos, .lhs = .elem_limbs, .op = .mul, .rhs = .@"1" }; const add_smin: Adjust = .{ .sign = .pos, .lhs = .smin, .op = .mul, .rhs = .@"1" }; const add_umax: Adjust = .{ .sign = .pos, .lhs = .umax, .op = .mul, .rhs = .@"1" }; + const repeat: Adjust = .{ .sign = .pos, .lhs = .repeat, .op = .mul, .rhs = .@"1" }; }; const Ref = enum(u5) { tmp0, @@ -100683,6 +101972,10 @@ const Select = struct { .unaligned_size => @intCast(s.cg.unalignedSize(op.flags.base.ref.typeOf(s))), .bit_size => @intCast(op.flags.base.ref.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), .src0_bit_size => @intCast(Select.Operand.Ref.src0.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), + .@"8_size_sub_bit_size" => { + const ty = op.flags.base.ref.typeOf(s); + break :lhs @intCast(8 * ty.abiSize(s.cg.pt.zcu) - ty.bitSize(s.cg.pt.zcu)); + }, .len => @intCast(op.flags.base.ref.typeOf(s).vectorLen(s.cg.pt.zcu)), .elem_limbs => @intCast(@divExact( op.flags.base.ref.typeOf(s).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), @@ -100704,6 +101997,10 @@ const Select = struct { .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate( -%op.flags.base.ref.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), )), + .repeat => switch (SignedImm) { + else => unreachable, + i64 => return @as(i64, op.imm) << 32 | @as(u32, @bitCast(op.imm)), + }, }; const rhs = op.flags.adjust.rhs.toLog2(); const op_res = op_res: switch (op.flags.adjust.op) { diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index ef7313d8395a..74d439491550 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -19273,6 +19273,22 @@ test clz { try test_clz.testIntVectors(); } +inline fn ctz(comptime Type: type, rhs: Type) @TypeOf(@ctz(rhs)) { + return @ctz(rhs); +} +test ctz { + const test_ctz = unary(ctz, .{}); + try test_ctz.testInts(); +} + +inline fn popCount(comptime Type: type, rhs: Type) @TypeOf(@popCount(rhs)) { + return @popCount(rhs); +} +test popCount { + const test_pop_count = unary(popCount, .{}); + try test_pop_count.testInts(); +} + inline fn byteSwap(comptime Type: type, rhs: Type) RoundBitsUp(Type, 8) { return @byteSwap(@as(RoundBitsUp(Type, 8), rhs)); }