Skip to content

Commit ff8c934

Browse files
Implement SVE2 ShiftLeftLogical Intrinsics (#116380)
* Implement SVE2 ShiftLeftLogicalSaturate, ShiftLeftLogicalSaturateUnsigned, ShiftLeftLogicalWideningEven, ShiftLeftLogicalWideningOdd * Remove HW_Flag_BaseTypeFromFirstArg * Fix emitter assembly display issue * Formatting
1 parent e34a3c4 commit ff8c934

File tree

11 files changed

+426
-90
lines changed

11 files changed

+426
-90
lines changed

src/coreclr/jit/codegenarm64test.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7267,29 +7267,29 @@ void CodeGen::genArm64EmitterUnitTestsSve()
72677267

72687268
// IF_SVE_FR_2A
72697269
theEmitter->emitIns_R_R_I(INS_sve_sshllb, EA_SCALABLE, REG_V0, REG_V1, 1,
7270-
INS_OPTS_SCALABLE_B); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
7270+
INS_OPTS_SCALABLE_H); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
72717271
theEmitter->emitIns_R_R_I(INS_sve_sshllt, EA_SCALABLE, REG_V2, REG_V3, 3,
7272-
INS_OPTS_SCALABLE_B); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
7272+
INS_OPTS_SCALABLE_H); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
72737273
theEmitter->emitIns_R_R_I(INS_sve_ushllb, EA_SCALABLE, REG_V4, REG_V5, 5,
7274-
INS_OPTS_SCALABLE_B); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
7274+
INS_OPTS_SCALABLE_H); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
72757275
theEmitter->emitIns_R_R_I(INS_sve_ushllt, EA_SCALABLE, REG_V6, REG_V7, 7,
7276-
INS_OPTS_SCALABLE_B); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
7276+
INS_OPTS_SCALABLE_H); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
72777277
theEmitter->emitIns_R_R_I(INS_sve_sshllb, EA_SCALABLE, REG_V8, REG_V9, 0,
7278-
INS_OPTS_SCALABLE_H); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
7278+
INS_OPTS_SCALABLE_S); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
72797279
theEmitter->emitIns_R_R_I(INS_sve_sshllt, EA_SCALABLE, REG_V10, REG_V11, 5,
7280-
INS_OPTS_SCALABLE_H); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
7280+
INS_OPTS_SCALABLE_S); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
72817281
theEmitter->emitIns_R_R_I(INS_sve_ushllb, EA_SCALABLE, REG_V12, REG_V13, 10,
7282-
INS_OPTS_SCALABLE_H); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
7282+
INS_OPTS_SCALABLE_S); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
72837283
theEmitter->emitIns_R_R_I(INS_sve_ushllt, EA_SCALABLE, REG_V14, REG_V15, 15,
7284-
INS_OPTS_SCALABLE_H); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
7284+
INS_OPTS_SCALABLE_S); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
72857285
theEmitter->emitIns_R_R_I(INS_sve_sshllb, EA_SCALABLE, REG_V16, REG_V17, 8,
7286-
INS_OPTS_SCALABLE_S); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
7286+
INS_OPTS_SCALABLE_D); // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
72877287
theEmitter->emitIns_R_R_I(INS_sve_sshllt, EA_SCALABLE, REG_V18, REG_V19, 16,
7288-
INS_OPTS_SCALABLE_S); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
7288+
INS_OPTS_SCALABLE_D); // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
72897289
theEmitter->emitIns_R_R_I(INS_sve_ushllb, EA_SCALABLE, REG_V20, REG_V21, 24,
7290-
INS_OPTS_SCALABLE_S); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
7290+
INS_OPTS_SCALABLE_D); // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
72917291
theEmitter->emitIns_R_R_I(INS_sve_ushllt, EA_SCALABLE, REG_V22, REG_V23, 31,
7292-
INS_OPTS_SCALABLE_S); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
7292+
INS_OPTS_SCALABLE_D); // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
72937293

72947294
// IF_SVE_FV_2A
72957295
theEmitter->emitIns_R_R_I(INS_sve_cadd, EA_SCALABLE, REG_V0, REG_V1, 90,

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,22 +2569,22 @@ void emitter::emitInsSve_R_R_I(instruction ins,
25692569
case INS_sve_sshllt:
25702570
case INS_sve_ushllb:
25712571
case INS_sve_ushllt:
2572-
assert(insOptsScalableWide(opt));
2572+
assert(insOptsScalableAtLeastHalf(opt));
25732573
assert(isVectorRegister(reg1)); // ddddd
25742574
assert(isVectorRegister(reg2)); // nnnnn
25752575
assert(isValidVectorElemsize(optGetSveElemsize(opt))); // x xx
25762576

25772577
switch (opt)
25782578
{
2579-
case INS_OPTS_SCALABLE_B:
2579+
case INS_OPTS_SCALABLE_H:
25802580
assert(isValidUimm<3>(imm)); // iii
25812581
break;
25822582

2583-
case INS_OPTS_SCALABLE_H:
2583+
case INS_OPTS_SCALABLE_S:
25842584
assert(isValidUimm<4>(imm)); // x iii
25852585
break;
25862586

2587-
case INS_OPTS_SCALABLE_S:
2587+
case INS_OPTS_SCALABLE_D:
25882588
assert(isValidUimm<5>(imm)); // xx iii
25892589
break;
25902590

@@ -10766,9 +10766,8 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
1076610766
code |= insEncodeReg_V<9, 5>(id->idReg2()); // nnnnn
1076710767
code |= insEncodeUimm<20, 16>(emitGetInsSC(id)); // iii
1076810768
// Bit 23 should not be set by below call
10769-
assert(insOptsScalableWide(id->idInsOpt()));
10770-
code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx
10771-
// x
10769+
assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
10770+
code |= insEncodeSplitUimm<22, 22, 20, 19>(optGetSveElemsize(id->idInsOpt()) / 2);
1077210771
dst += emitOutput_Instr(dst, code);
1077310772
break;
1077410773

@@ -13474,23 +13473,23 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
1347413473

1347513474
case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long
1347613475
{
13477-
assert(insOptsScalableWide(id->idInsOpt()));
13476+
assert(insOptsScalableAtLeastHalf(id->idInsOpt()));
1347813477
assert(isVectorRegister(id->idReg1())); // ddddd
1347913478
assert(isVectorRegister(id->idReg2())); // nnnnn
1348013479
assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x xx
1348113480
imm = emitGetInsSC(id);
1348213481

1348313482
switch (id->idInsOpt())
1348413483
{
13485-
case INS_OPTS_SCALABLE_B:
13484+
case INS_OPTS_SCALABLE_H:
1348613485
assert(isValidUimm<3>(imm)); // iii
1348713486
break;
1348813487

13489-
case INS_OPTS_SCALABLE_H:
13488+
case INS_OPTS_SCALABLE_S:
1349013489
assert(isValidUimm<4>(imm)); // x iii
1349113490
break;
1349213491

13493-
case INS_OPTS_SCALABLE_S:
13492+
case INS_OPTS_SCALABLE_D:
1349413493
assert(isValidUimm<5>(imm)); // xx iii
1349513494
break;
1349613495

@@ -15351,10 +15350,10 @@ void emitter::emitDispInsSveHelp(instrDesc* id)
1535115350
// <Zd>.<T>, <Zn>.<Tb>, #<const>
1535215351
case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long
1535315352
{
15354-
const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1);
15355-
emitDispSveReg(id->idReg1(), largeSizeSpecifier, true); // ddddd
15356-
emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn
15357-
emitDispImm(emitGetInsSC(id), false); // iii
15353+
const insOpts narrowSizeSpecifier = (insOpts)(id->idInsOpt() - 1);
15354+
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
15355+
emitDispSveReg(id->idReg2(), narrowSizeSpecifier, true); // nnnnn
15356+
emitDispImm(emitGetInsSC(id), false); // iii
1535815357
break;
1535915358
}
1536015359

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,17 @@ void HWIntrinsicInfo::lookupImmBounds(
342342

343343
if (category == HW_Category_ShiftLeftByImmediate)
344344
{
345+
int size = genTypeSize(baseType);
346+
347+
if (intrinsic == NI_Sve2_ShiftLeftLogicalWideningEven || intrinsic == NI_Sve2_ShiftLeftLogicalWideningOdd)
348+
{
349+
// Edge case for widening shifts. The base type is the wide type, but the maximum shift is the number
350+
// of bits in the narrow type.
351+
size /= 2;
352+
}
353+
345354
// The left shift amount is in the range 0 to the element width in bits minus 1.
346-
immUpperBound = BITS_PER_BYTE * genTypeSize(baseType) - 1;
355+
immUpperBound = BITS_PER_BYTE * size - 1;
347356
}
348357
else if (category == HW_Category_ShiftRightByImmediate)
349358
{

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -719,8 +719,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
719719
break;
720720
}
721721

722-
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE;
723-
bool hasShift = false;
722+
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE;
723+
bool hasImmShift = (intrinEmbMask.category == HW_Category_ShiftLeftByImmediate ||
724+
intrinEmbMask.category == HW_Category_ShiftRightByImmediate) &&
725+
HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id);
724726

725727
insOpts embOpt = opt;
726728
switch (intrinEmbMask.id)
@@ -738,10 +740,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
738740
break;
739741
}
740742

741-
case NI_Sve_ShiftRightArithmeticForDivide:
742-
hasShift = true;
743-
break;
744-
745743
case NI_Sve_CreateBreakPropagateMask:
746744
embOpt = INS_OPTS_SCALABLE_B;
747745
break;
@@ -758,7 +756,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
758756
}
759757

760758
auto emitInsHelper = [&](regNumber reg1, regNumber reg2, regNumber reg3) {
761-
if (hasShift)
759+
if (hasImmShift)
762760
{
763761
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic());
764762
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
@@ -774,7 +772,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
774772
};
775773

776774
auto emitInsMovPrfxHelper = [&](regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) {
777-
if (hasShift)
775+
if (hasImmShift)
778776
{
779777
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2);
780778
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
@@ -818,7 +816,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
818816
// Finally, perform the actual "predicated" operation so that `targetReg` is the first
819817
// operand and `embMaskOp2Reg` is the second operand.
820818

821-
if (hasShift)
819+
if (hasImmShift)
822820
{
823821
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2);
824822
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRounded,
328328
HARDWARE_INTRINSIC(Sve2, ShiftArithmeticRoundedSaturate, -1, -1, {INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_sve_sqrshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
329329
HARDWARE_INTRINSIC(Sve2, ShiftArithmeticSaturate, -1, -1, {INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_sve_sqshl, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
330330
HARDWARE_INTRINSIC(Sve2, ShiftLeftAndInsert, -1, 3, {INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics)
331+
HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalSaturate, -1, -1, {INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_sve_uqshl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
332+
HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalSaturateUnsigned, -1, -1, {INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_sve_sqshlu, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics)
333+
HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_sshllb, INS_sve_ushllb, INS_sve_sshllb, INS_sve_ushllb, INS_sve_sshllb, INS_sve_ushllb, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand)
334+
HARDWARE_INTRINSIC(Sve2, ShiftLeftLogicalWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_sshllt, INS_sve_ushllt, INS_sve_sshllt, INS_sve_ushllt, INS_sve_sshllt, INS_sve_ushllt, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand)
331335
HARDWARE_INTRINSIC(Sve2, Xor, -1, 3, {INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_sve_eor3, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
332336
HARDWARE_INTRINSIC(Sve2, XorRotateRight, -1, 3, {INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_sve_xar, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
333337
#define LAST_NI_Sve2 NI_Sve2_XorRotateRight

0 commit comments

Comments
 (0)