-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][AMDGPU] Add __builtin_amdgcn_cvt_off_f32_i4 #133741
Conversation
This builtin maps to V_CVT_OFF_F32_I4 which treats its input as a 4-bit signed integer and returns 0.0625f * src . SWDEV-518861
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-clang Author: Juan Manuel Martinez Caamaño (jmmartinez) ChangesThis built-in that maps to SWDEV-518861 Full diff: https://github.com/llvm/llvm-project/pull/133741.diff 10 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 44ef404aee72f..f38148cc795dc 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -140,6 +140,7 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
+BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fUi", "nc")
BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl
new file mode 100644
index 0000000000000..6dc235f9cc6c7
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl
@@ -0,0 +1,15 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
+// RUN: -emit-llvm -o - | FileCheck %s
+
+// CHECK-LABEL: @test_builtin_amdgcn_cvt_off_f32_i4(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store i32 [[N:%.*]], ptr addrspace(5) [[N_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[TMP0]])
+// CHECK-NEXT: ret float [[TMP1]]
+//
+float test_builtin_amdgcn_cvt_off_f32_i4(unsigned n) {
+ return __builtin_amdgcn_cvt_off_f32_i4(n);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl
new file mode 100644
index 0000000000000..f5b02b80c37da
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
+
+void test_builtin_amdgcn_cvt_off_f32_i4(unsigned n) {
+ struct A{ unsigned x; } a;
+ __builtin_amdgcn_cvt_off_f32_i4(n, n); // expected-error {{too many arguments to function call, expected 1, have 2}}
+ __builtin_amdgcn_cvt_off_f32_i4(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+ __builtin_amdgcn_cvt_off_f32_i4(a); // expected-error {{passing '__private struct A' to parameter of incompatible type 'unsigned int'}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f53016f62abbe..ebac0f9029791 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3375,6 +3375,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
+// llvm.amdgcn.cvt.off.fp32.i4 int srcA
+def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
+ DefaultAttrsIntrinsic<[llvm_float_ty],
+ [llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// gfx950 intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3246e575ea6a9..533ad349f7500 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6042,6 +6042,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
// TODO: Handle more intrinsics
switch (IntrinsicID) {
case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_cvt_off_f32_i4:
return true;
case Intrinsic::amdgcn_frexp_mant: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 7cd97e95b0189..535fda7393bc1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -729,6 +729,15 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
+ case Intrinsic::amdgcn_cvt_off_f32_i4: {
+ ConstantInt *CArg = dyn_cast<ConstantInt>(II.getArgOperand(0));
+ if (!CArg)
+ break;
+ int CI4BitAsInt = CArg->getValue().trunc(4).getSExtValue();
+ float ResVal = 0.0625 * CI4BitAsInt;
+ Constant *Res = ConstantFP::get(II.getType(), ResVal);
+ return IC.replaceInstUsesWith(II, Res);
+ }
case Intrinsic::amdgcn_ubfe:
case Intrinsic::amdgcn_sbfe: {
// Decompose simple cases into standard shifts.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7df1e634b21ba..1d0e81db5a5db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4585,6 +4585,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
case Intrinsic::amdgcn_cvt_f32_fp8:
case Intrinsic::amdgcn_cvt_f32_bf8:
+ case Intrinsic::amdgcn_cvt_off_f32_i4:
case Intrinsic::amdgcn_cvt_pk_f32_fp8:
case Intrinsic::amdgcn_cvt_pk_f32_bf8:
case Intrinsic::amdgcn_cvt_pk_fp8_f32:
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index def06c1e9a0d7..1dae2e432eb8c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1578,6 +1578,11 @@ class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat <
let OtherPredicates = [Pred];
}
+def : GCNPat <
+ (f32 (int_amdgcn_cvt_off_f32_i4 i32:$src)),
+ (V_CVT_OFF_F32_I4_e32 VGPR_32:$src)
+>;
+
foreach vt = Reg32Types.types in {
def : MovDPP8Pattern<isGFX10Only, V_MOV_B32_dpp8_gfx10, vt>;
def : MovDPP8Pattern<isGFX11Only, V_MOV_B32_dpp8_gfx11, vt>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll
new file mode 100644
index 0000000000000..a25d9c30a2331
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs %s --global-isel -o - | FileCheck %s
+
+declare float @llvm.amdgcn.cvt.off.f32.i4(i32)
+
+define amdgpu_cs float @cvt_var(i32 %a) {
+; CHECK-LABEL: cvt_var:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, v0
+; CHECK-NEXT: ; return to shader part epilog
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
+ ret float %ret
+}
+
+define amdgpu_cs float @cvt_imm() {
+; CHECK-LABEL: cvt_imm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, 4
+; CHECK-NEXT: ; return to shader part epilog
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
+ ret float %ret
+}
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll b/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll
new file mode 100644
index 0000000000000..bac02bd61d0a9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll
@@ -0,0 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s
+
+declare float @llvm.amdgcn.cvt.off.f32.i4(i32)
+
+define float @cvt_var(i32 %a) {
+; CHECK-LABEL: define float @cvt_var(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[A]])
+; CHECK-NEXT: ret float [[RET]]
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
+ ret float %ret
+}
+
+define float @cvt_imm_0() {
+; CHECK-LABEL: define float @cvt_imm_0() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 0)
+ ret float %ret
+}
+
+define float @cvt_imm_1() {
+; CHECK-LABEL: define float @cvt_imm_1() {
+; CHECK-NEXT: ret float 6.250000e-02
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 1)
+ ret float %ret
+}
+
+define float @cvt_imm_2() {
+; CHECK-LABEL: define float @cvt_imm_2() {
+; CHECK-NEXT: ret float 1.250000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 2)
+ ret float %ret
+}
+
+define float @cvt_imm_3() {
+; CHECK-LABEL: define float @cvt_imm_3() {
+; CHECK-NEXT: ret float 1.875000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 3)
+ ret float %ret
+}
+
+define float @cvt_imm_4() {
+; CHECK-LABEL: define float @cvt_imm_4() {
+; CHECK-NEXT: ret float 2.500000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
+ ret float %ret
+}
+
+define float @cvt_imm_5() {
+; CHECK-LABEL: define float @cvt_imm_5() {
+; CHECK-NEXT: ret float 3.125000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 5)
+ ret float %ret
+}
+
+define float @cvt_imm_6() {
+; CHECK-LABEL: define float @cvt_imm_6() {
+; CHECK-NEXT: ret float 3.750000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 6)
+ ret float %ret
+}
+
+define float @cvt_imm_7() {
+; CHECK-LABEL: define float @cvt_imm_7() {
+; CHECK-NEXT: ret float 4.375000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 7)
+ ret float %ret
+}
+
+define float @cvt_imm_8() {
+; CHECK-LABEL: define float @cvt_imm_8() {
+; CHECK-NEXT: ret float -5.000000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 8)
+ ret float %ret
+}
+
+define float @cvt_imm_9() {
+; CHECK-LABEL: define float @cvt_imm_9() {
+; CHECK-NEXT: ret float -4.375000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 9)
+ ret float %ret
+}
+
+define float @cvt_imm_10() {
+; CHECK-LABEL: define float @cvt_imm_10() {
+; CHECK-NEXT: ret float -3.750000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 10)
+ ret float %ret
+}
+
+define float @cvt_imm_11() {
+; CHECK-LABEL: define float @cvt_imm_11() {
+; CHECK-NEXT: ret float -3.125000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 11)
+ ret float %ret
+}
+
+define float @cvt_imm_12() {
+; CHECK-LABEL: define float @cvt_imm_12() {
+; CHECK-NEXT: ret float -2.500000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 12)
+ ret float %ret
+}
+
+define float @cvt_imm_13() {
+; CHECK-LABEL: define float @cvt_imm_13() {
+; CHECK-NEXT: ret float -1.875000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 13)
+ ret float %ret
+}
+
+define float @cvt_imm_14() {
+; CHECK-LABEL: define float @cvt_imm_14() {
+; CHECK-NEXT: ret float -1.250000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 14)
+ ret float %ret
+}
+
+define float @cvt_imm_15() {
+; CHECK-LABEL: define float @cvt_imm_15() {
+; CHECK-NEXT: ret float -6.250000e-02
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 15)
+ ret float %ret
+}
+
+define float @cvt_imm_underflow() {
+; CHECK-LABEL: define float @cvt_imm_underflow() {
+; CHECK-NEXT: ret float -6.250000e-02
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 -1)
+ ret float %ret
+}
+
+define float @cvt_imm_overflow() {
+; CHECK-LABEL: define float @cvt_imm_overflow() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 16)
+ ret float %ret
+}
|
@llvm/pr-subscribers-llvm-ir Author: Juan Manuel Martinez Caamaño (jmmartinez) ChangesThis built-in that maps to SWDEV-518861 Full diff: https://github.com/llvm/llvm-project/pull/133741.diff 10 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 44ef404aee72f..f38148cc795dc 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -140,6 +140,7 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
+BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fUi", "nc")
BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl
new file mode 100644
index 0000000000000..6dc235f9cc6c7
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl
@@ -0,0 +1,15 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
+// RUN: -emit-llvm -o - | FileCheck %s
+
+// CHECK-LABEL: @test_builtin_amdgcn_cvt_off_f32_i4(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store i32 [[N:%.*]], ptr addrspace(5) [[N_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[TMP0]])
+// CHECK-NEXT: ret float [[TMP1]]
+//
+float test_builtin_amdgcn_cvt_off_f32_i4(unsigned n) {
+ return __builtin_amdgcn_cvt_off_f32_i4(n);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl
new file mode 100644
index 0000000000000..f5b02b80c37da
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
+
+void test_builtin_amdgcn_cvt_off_f32_i4(unsigned n) {
+ struct A{ unsigned x; } a;
+ __builtin_amdgcn_cvt_off_f32_i4(n, n); // expected-error {{too many arguments to function call, expected 1, have 2}}
+ __builtin_amdgcn_cvt_off_f32_i4(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+ __builtin_amdgcn_cvt_off_f32_i4(a); // expected-error {{passing '__private struct A' to parameter of incompatible type 'unsigned int'}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f53016f62abbe..ebac0f9029791 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3375,6 +3375,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
+// llvm.amdgcn.cvt.off.fp32.i4 int srcA
+def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
+ DefaultAttrsIntrinsic<[llvm_float_ty],
+ [llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// gfx950 intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3246e575ea6a9..533ad349f7500 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6042,6 +6042,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
// TODO: Handle more intrinsics
switch (IntrinsicID) {
case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_cvt_off_f32_i4:
return true;
case Intrinsic::amdgcn_frexp_mant: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 7cd97e95b0189..535fda7393bc1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -729,6 +729,15 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
+ case Intrinsic::amdgcn_cvt_off_f32_i4: {
+ ConstantInt *CArg = dyn_cast<ConstantInt>(II.getArgOperand(0));
+ if (!CArg)
+ break;
+ int CI4BitAsInt = CArg->getValue().trunc(4).getSExtValue();
+ float ResVal = 0.0625 * CI4BitAsInt;
+ Constant *Res = ConstantFP::get(II.getType(), ResVal);
+ return IC.replaceInstUsesWith(II, Res);
+ }
case Intrinsic::amdgcn_ubfe:
case Intrinsic::amdgcn_sbfe: {
// Decompose simple cases into standard shifts.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7df1e634b21ba..1d0e81db5a5db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4585,6 +4585,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
case Intrinsic::amdgcn_cvt_f32_fp8:
case Intrinsic::amdgcn_cvt_f32_bf8:
+ case Intrinsic::amdgcn_cvt_off_f32_i4:
case Intrinsic::amdgcn_cvt_pk_f32_fp8:
case Intrinsic::amdgcn_cvt_pk_f32_bf8:
case Intrinsic::amdgcn_cvt_pk_fp8_f32:
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index def06c1e9a0d7..1dae2e432eb8c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1578,6 +1578,11 @@ class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat <
let OtherPredicates = [Pred];
}
+def : GCNPat <
+ (f32 (int_amdgcn_cvt_off_f32_i4 i32:$src)),
+ (V_CVT_OFF_F32_I4_e32 VGPR_32:$src)
+>;
+
foreach vt = Reg32Types.types in {
def : MovDPP8Pattern<isGFX10Only, V_MOV_B32_dpp8_gfx10, vt>;
def : MovDPP8Pattern<isGFX11Only, V_MOV_B32_dpp8_gfx11, vt>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll
new file mode 100644
index 0000000000000..a25d9c30a2331
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs %s --global-isel -o - | FileCheck %s
+
+declare float @llvm.amdgcn.cvt.off.f32.i4(i32)
+
+define amdgpu_cs float @cvt_var(i32 %a) {
+; CHECK-LABEL: cvt_var:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, v0
+; CHECK-NEXT: ; return to shader part epilog
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
+ ret float %ret
+}
+
+define amdgpu_cs float @cvt_imm() {
+; CHECK-LABEL: cvt_imm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, 4
+; CHECK-NEXT: ; return to shader part epilog
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
+ ret float %ret
+}
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll b/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll
new file mode 100644
index 0000000000000..bac02bd61d0a9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll
@@ -0,0 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s
+
+declare float @llvm.amdgcn.cvt.off.f32.i4(i32)
+
+define float @cvt_var(i32 %a) {
+; CHECK-LABEL: define float @cvt_var(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[A]])
+; CHECK-NEXT: ret float [[RET]]
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
+ ret float %ret
+}
+
+define float @cvt_imm_0() {
+; CHECK-LABEL: define float @cvt_imm_0() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 0)
+ ret float %ret
+}
+
+define float @cvt_imm_1() {
+; CHECK-LABEL: define float @cvt_imm_1() {
+; CHECK-NEXT: ret float 6.250000e-02
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 1)
+ ret float %ret
+}
+
+define float @cvt_imm_2() {
+; CHECK-LABEL: define float @cvt_imm_2() {
+; CHECK-NEXT: ret float 1.250000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 2)
+ ret float %ret
+}
+
+define float @cvt_imm_3() {
+; CHECK-LABEL: define float @cvt_imm_3() {
+; CHECK-NEXT: ret float 1.875000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 3)
+ ret float %ret
+}
+
+define float @cvt_imm_4() {
+; CHECK-LABEL: define float @cvt_imm_4() {
+; CHECK-NEXT: ret float 2.500000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
+ ret float %ret
+}
+
+define float @cvt_imm_5() {
+; CHECK-LABEL: define float @cvt_imm_5() {
+; CHECK-NEXT: ret float 3.125000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 5)
+ ret float %ret
+}
+
+define float @cvt_imm_6() {
+; CHECK-LABEL: define float @cvt_imm_6() {
+; CHECK-NEXT: ret float 3.750000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 6)
+ ret float %ret
+}
+
+define float @cvt_imm_7() {
+; CHECK-LABEL: define float @cvt_imm_7() {
+; CHECK-NEXT: ret float 4.375000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 7)
+ ret float %ret
+}
+
+define float @cvt_imm_8() {
+; CHECK-LABEL: define float @cvt_imm_8() {
+; CHECK-NEXT: ret float -5.000000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 8)
+ ret float %ret
+}
+
+define float @cvt_imm_9() {
+; CHECK-LABEL: define float @cvt_imm_9() {
+; CHECK-NEXT: ret float -4.375000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 9)
+ ret float %ret
+}
+
+define float @cvt_imm_10() {
+; CHECK-LABEL: define float @cvt_imm_10() {
+; CHECK-NEXT: ret float -3.750000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 10)
+ ret float %ret
+}
+
+define float @cvt_imm_11() {
+; CHECK-LABEL: define float @cvt_imm_11() {
+; CHECK-NEXT: ret float -3.125000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 11)
+ ret float %ret
+}
+
+define float @cvt_imm_12() {
+; CHECK-LABEL: define float @cvt_imm_12() {
+; CHECK-NEXT: ret float -2.500000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 12)
+ ret float %ret
+}
+
+define float @cvt_imm_13() {
+; CHECK-LABEL: define float @cvt_imm_13() {
+; CHECK-NEXT: ret float -1.875000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 13)
+ ret float %ret
+}
+
+define float @cvt_imm_14() {
+; CHECK-LABEL: define float @cvt_imm_14() {
+; CHECK-NEXT: ret float -1.250000e-01
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 14)
+ ret float %ret
+}
+
+define float @cvt_imm_15() {
+; CHECK-LABEL: define float @cvt_imm_15() {
+; CHECK-NEXT: ret float -6.250000e-02
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 15)
+ ret float %ret
+}
+
+define float @cvt_imm_underflow() {
+; CHECK-LABEL: define float @cvt_imm_underflow() {
+; CHECK-NEXT: ret float -6.250000e-02
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 -1)
+ ret float %ret
+}
+
+define float @cvt_imm_overflow() {
+; CHECK-LABEL: define float @cvt_imm_overflow() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 16)
+ ret float %ret
+}
|
Why this lowering? We have a table right? |
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 00967bee0..db07f0db5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -730,13 +730,13 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_cvt_off_f32_i4: {
- Value* Arg = II.getArgOperand(0);
+ Value *Arg = II.getArgOperand(0);
Type *Ty = II.getType();
if (isa<PoisonValue>(Arg))
return IC.replaceInstUsesWith(II, PoisonValue::get(Ty));
- if(IC.getSimplifyQuery().isUndefValue(Arg))
+ if (IC.getSimplifyQuery().isUndefValue(Arg))
return IC.replaceInstUsesWith(II, Constant::getNullValue(Ty));
ConstantInt *CArg = dyn_cast<ConstantInt>(II.getArgOperand(0));
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll llvm/test/Transforms/InstCombine/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
… depend on the host
This built-in that maps to
V_CVT_OFF_F32_I4
which treats its input as a 4-bit signed integer and returns0.0625f * src
.SWDEV-518861