Skip to content

Commit

Permalink
R600/SI: Handle MUBUF instructions in SIInstrInfo::moveToVALU()
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204476 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
tstellarAMD committed Mar 21, 2014
1 parent 1f1c049 commit a1d28f6
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 5 deletions.
3 changes: 3 additions & 0 deletions lib/Target/R600/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
addPass(createR600VectorRegMerger(*TM));
} else {
addPass(createSIFixSGPRCopiesPass(*TM));
// SIFixSGPRCopies can generate a lot of duplicate instructions,
// so we need to run MachineCSE afterwards.
addPass(&MachineCSEID);
}
return false;
}
Expand Down
4 changes: 1 addition & 3 deletions lib/Target/R600/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"

const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;

using namespace llvm;

SITargetLowering::SITargetLowering(TargetMachine &TM) :
Expand Down Expand Up @@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
.addImm(0);
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
.addImm(RSRC_DATA_FORMAT >> 32);
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
.addReg(SubRegHiLo)
.addImm(AMDGPU::sub0)
Expand Down
1 change: 1 addition & 0 deletions lib/Target/R600/SIInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let EXP_CNT = 1;

let neverHasSideEffects = 1;
let UseNamedOperandTable = 1;
}

class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
Expand Down
136 changes: 135 additions & 1 deletion lib/Target/R600/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,32 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
MO.ChangeToRegister(Reg, false);
}

unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
MachineRegisterInfo &MRI,
MachineOperand &SuperReg,
const TargetRegisterClass *SuperRC,
unsigned SubIdx,
const TargetRegisterClass *SubRC)
const {
assert(SuperReg.isReg());

unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
unsigned SubReg = MRI.createVirtualRegister(SubRC);

// Just in case the super register is itself a sub-register, copy it to a new
// value so we don't need to wory about merging its subreg index with the
// SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
NewSuperReg)
.addOperand(SuperReg);

BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
SubReg)
.addReg(NewSuperReg, 0, SubIdx);
return SubReg;
}

void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
Expand Down Expand Up @@ -675,6 +701,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MI->getOperand(i).setReg(DstReg);
}
}

// Legalize MUBUF* instructions
// FIXME: If we start using the non-addr64 instructions for compute, we
// may need to legalize them here.

int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::srsrc);
int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::vaddr);
if (SRsrcIdx != -1 && VAddrIdx != -1) {
const TargetRegisterClass *VAddrRC =
RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);

if(VAddrRC->getSize() == 8 &&
MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
// We have a MUBUF instruction that uses a 64-bit vaddr register and
// srsrc has the incorrect register class. In order to fix this, we
// need to extract the pointer from the resource descriptor (srsrc),
// add it to the value of vadd, then store the result in the vaddr
// operand. Then, we need to set the pointer field of the resource
// descriptor to zero.

MachineBasicBlock &MBB = *MI->getParent();
MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);

// SRsrcPtrLo = srsrc:sub0
SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
&AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);

// SRsrcPtrHi = srsrc:sub1
SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
&AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);

// VAddrLo = vaddr:sub0
VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
&AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);

// VAddrHi = vaddr:sub1
VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
&AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);

// NewVaddrLo = SRsrcPtrLo + VAddrLo
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
NewVAddrLo)
.addReg(SRsrcPtrLo)
.addReg(VAddrLo)
.addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);

// NewVaddrHi = SRsrcPtrHi + VAddrHi
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
NewVAddrHi)
.addReg(SRsrcPtrHi)
.addReg(VAddrHi)
.addReg(AMDGPU::VCC, RegState::ImplicitDefine)
.addReg(AMDGPU::VCC, RegState::Implicit);

// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
NewVAddr)
.addReg(NewVAddrLo)
.addImm(AMDGPU::sub0)
.addReg(NewVAddrHi)
.addImm(AMDGPU::sub1);

// Zero64 = 0
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
Zero64)
.addImm(0);

// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
SRsrcFormatLo)
.addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);

// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
SRsrcFormatHi)
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);

// NewSRsrc = {Zero64, SRsrcFormat}
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
NewSRsrc)
.addReg(Zero64)
.addImm(AMDGPU::sub0_sub1)
.addReg(SRsrcFormatLo)
.addImm(AMDGPU::sub2)
.addReg(SRsrcFormatHi)
.addImm(AMDGPU::sub3);

// Update the instruction to use NewVaddr
MI->getOperand(VAddrIdx).setReg(NewVAddr);
// Update the instruction to use NewSRsrc
MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
}
}
}

void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Expand Down Expand Up @@ -731,8 +861,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}

unsigned NewOpcode = getVALUOp(*Inst);
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.
legalizeOperands(Inst);
continue;
}

// Use the new VALU Opcode.
const MCInstrDesc &NewDesc = get(NewOpcode);
Expand Down
10 changes: 10 additions & 0 deletions lib/Target/R600/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ class SIInstrInfo : public AMDGPUInstrInfo {
private:
const SIRegisterInfo RI;

unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
MachineRegisterInfo &MRI,
MachineOperand &SuperReg,
const TargetRegisterClass *SuperRC,
unsigned SubIdx,
const TargetRegisterClass *SubRC) const;

public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);

Expand Down Expand Up @@ -142,6 +149,9 @@ namespace AMDGPU {
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);

const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;


} // End namespace AMDGPU

} // End namespace llvm
Expand Down
8 changes: 7 additions & 1 deletion test/CodeGen/R600/salu-to-valu.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s

; In this test both the pointer and the offset operands to the
; BUFFER_LOAD instructions end up being stored in vgprs. This
Expand All @@ -8,8 +8,14 @@
; (low 64-bits of srsrc).

; CHECK-LABEL: @mubuf

; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v

; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
; instructions
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #1
Expand Down

0 comments on commit a1d28f6

Please sign in to comment.