Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Add software pipeliner support #117546

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4248,3 +4248,84 @@ bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
return false;
return LHS.getImm() <= RHS.getImm();
}

namespace {
class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
const MachineInstr *LHS;
const MachineInstr *RHS;
SmallVector<MachineOperand, 3> Cond;

public:
RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,
const SmallVectorImpl<MachineOperand> &Cond)
: LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {}

bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
// Make the instructions for loop control be placed in stage 0.
// The predecessors of LHS/RHS are considered by the caller.
if (LHS && MI == LHS)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we want to ignore LHS and RHS? AArch64 only ignores the compare instruction, but on RISC-V the branch is also the compare instruction. So LHS and RHS are inputs to the compare which AArch64 doesn't ignore.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the same, we just need a root SUnit and SMSchedule::computeUnpipelineableNodes will add all its predecessors.

/// Determine transitive dependences of unpipelineable instructions
SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
SmallSet<SUnit *, 8> DoNotPipeline;
SmallVector<SUnit *, 8> Worklist;
for (auto &SU : SSD->SUnits)
if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
Worklist.push_back(&SU);
while (!Worklist.empty()) {
auto SU = Worklist.pop_back_val();
if (DoNotPipeline.count(SU))
continue;
LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
DoNotPipeline.insert(SU);
for (auto &Dep : SU->Preds)
Worklist.push_back(Dep.getSUnit());
if (SU->getInstr()->isPHI())
for (auto &Dep : SU->Succs)
if (Dep.getKind() == SDep::Anti)
Worklist.push_back(Dep.getSUnit());
}
return DoNotPipeline;
}

So for AArch64, the LHS/RHS of the compare instruction will also be ignored, but not explicitly be ignored in shouldIgnoreForPipelining.

return true;
if (RHS && MI == RHS)
return true;
return false;
}

std::optional<bool> createTripCountGreaterCondition(
int TC, MachineBasicBlock &MBB,
SmallVectorImpl<MachineOperand> &CondParam) override {
// A branch instruction will be inserted as "if (Cond) goto epilogue".
// Cond is normalized for such use.
// The predecessors of the branch are assumed to have already been inserted.
CondParam = Cond;
return {};
}

void setPreheader(MachineBasicBlock *NewPreheader) override {}

void adjustTripCount(int TripCountAdjust) override {}

void disposed() override {}
};
} // namespace

std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
wangpc-pp marked this conversation as resolved.
Show resolved Hide resolved
if (analyzeBranch(*LoopBB, TBB, FBB, Cond, /*AllowModify=*/false))
return nullptr;

// Infinite loops are not supported
if (TBB == LoopBB && FBB == LoopBB)
return nullptr;

// Must be conditional branch
if (FBB == nullptr)
return nullptr;

assert((TBB == LoopBB || FBB == LoopBB) &&
"The Loop must be a single-basic-block loop");

// Normalization for createTripCountGreaterCondition()
if (TBB == LoopBB)
reverseBranchCondition(Cond);

const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * {
if (!Op.isReg())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When would isReg() be false?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One operand may be imm for BEQIMM/BNEIMM. And we may have standard branches with immediate soon.

return nullptr;
Register Reg = Op.getReg();
if (!Reg.isVirtual())
return nullptr;
return MRI.getVRegDef(Reg);
};

const MachineInstr *LHS = FindRegDef(Cond[1]);
const MachineInstr *RHS = FindRegDef(Cond[2]);
if (LHS && LHS->isPHI())
return nullptr;
if (RHS && RHS->isPHI())
return nullptr;
wangpc-pp marked this conversation as resolved.
Show resolved Hide resolved

return std::make_unique<RISCVPipelinerLoopInfo>(LHS, RHS, Cond);
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {

unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;

std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;

protected:
const RISCVSubtarget &STI;

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const {

bool RISCVSubtarget::enableSubRegLiveness() const { return true; }

bool RISCVSubtarget::enableMachinePipeliner() const {
return getSchedModel().hasInstrSchedModel();
}

/// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
bool RISCVSubtarget::useAA() const { return UseAA; }
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {

bool enableSubRegLiveness() const override;

bool enableMachinePipeliner() const override;

bool useDFAforSMS() const override { return false; }

bool useAA() const override;

unsigned getCacheLineSize() const override {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ static cl::opt<bool> DisableVectorMaskMutation(
cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
cl::Hidden);

static cl::opt<bool>
EnableMachinePipeliner("riscv-enable-pipeliner",
cl::desc("Enable Machine Pipeliner for RISC-V"),
cl::init(false), cl::Hidden);

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
Expand Down Expand Up @@ -603,6 +608,9 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVInsertReadWriteCSRPass());
addPass(createRISCVInsertWriteVXRMPass());
addPass(createRISCVLandingPadSetupPass());

if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner)
addPass(&MachinePipelinerID);
}

void RISCVPassConfig::addFastRegAlloc() {
Expand Down
109 changes: 109 additions & 0 deletions llvm/test/CodeGen/RISCV/machine-pipeliner.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=false < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOT-PIPELINED
; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=true < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PIPELINED

; We shouldn't pipeline this loop as one operand of branch is a PHI.
define i32 @test_phi() {
; CHECK-LABEL: test_phi:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: sh a0, 0(zero)
; CHECK-NEXT: bnez a1, .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ret
entry:
br label %for.body

for.cond.cleanup: ; preds = %for.body
ret i32 0

for.body: ; preds = %for.body, %entry
%indvars.iv1 = phi i64 [ 0, %entry ], [ 1, %for.body ]
store i16 1, ptr null, align 4
%exitcond.not.31 = icmp eq i64 %indvars.iv1, 0
br i1 %exitcond.not.31, label %for.cond.cleanup, label %for.body
}

define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cnt) {
; CHECK-NOT-PIPELINED-LABEL: test_pipelined_1:
; CHECK-NOT-PIPELINED: # %bb.0: # %entry
; CHECK-NOT-PIPELINED-NEXT: blez a2, .LBB1_3
; CHECK-NOT-PIPELINED-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NOT-PIPELINED-NEXT: addi a2, a2, -1
; CHECK-NOT-PIPELINED-NEXT: sh2add.uw a2, a2, a1
; CHECK-NOT-PIPELINED-NEXT: addi a2, a2, 4
; CHECK-NOT-PIPELINED-NEXT: .LBB1_2: # %for.body
; CHECK-NOT-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NOT-PIPELINED-NEXT: lw a3, 0(a1)
; CHECK-NOT-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-NOT-PIPELINED-NEXT: addi a3, a3, 1
; CHECK-NOT-PIPELINED-NEXT: sw a3, 0(a0)
; CHECK-NOT-PIPELINED-NEXT: addi a0, a0, 4
; CHECK-NOT-PIPELINED-NEXT: bne a1, a2, .LBB1_2
; CHECK-NOT-PIPELINED-NEXT: .LBB1_3: # %for.end
; CHECK-NOT-PIPELINED-NEXT: ret
;
; CHECK-PIPELINED-LABEL: test_pipelined_1:
; CHECK-PIPELINED: # %bb.0: # %entry
; CHECK-PIPELINED-NEXT: blez a2, .LBB1_6
; CHECK-PIPELINED-NEXT: # %bb.1: # %for.body.preheader
; CHECK-PIPELINED-NEXT: lw a4, 0(a1)
; CHECK-PIPELINED-NEXT: addi a2, a2, -1
; CHECK-PIPELINED-NEXT: sh2add.uw a6, a2, a1
; CHECK-PIPELINED-NEXT: addi a2, a0, 4
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-PIPELINED-NEXT: addi a6, a6, 4
; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_5
; CHECK-PIPELINED-NEXT: # %bb.2: # %for.body
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
; CHECK-PIPELINED-NEXT: addi a3, a2, 4
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_4
; CHECK-PIPELINED-NEXT: .LBB1_3: # %for.body
; CHECK-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
; CHECK-PIPELINED-NEXT: mv a4, a5
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
; CHECK-PIPELINED-NEXT: mv a0, a2
; CHECK-PIPELINED-NEXT: mv a2, a3
; CHECK-PIPELINED-NEXT: addi a3, a3, 4
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
; CHECK-PIPELINED-NEXT: bne a1, a6, .LBB1_3
; CHECK-PIPELINED-NEXT: .LBB1_4:
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
; CHECK-PIPELINED-NEXT: mv a0, a2
; CHECK-PIPELINED-NEXT: mv a4, a5
; CHECK-PIPELINED-NEXT: .LBB1_5:
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
; CHECK-PIPELINED-NEXT: .LBB1_6: # %for.end
; CHECK-PIPELINED-NEXT: ret
entry:
%cmp = icmp sgt i32 %cnt, 0
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %entry, %for.body
%inc.next = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%in.addr.next = phi ptr [ %incdec.in, %for.body ], [ %in, %entry ]
%out.addr.next = phi ptr [ %incdec.out, %for.body ], [ %out, %entry ]
%0 = load i32, ptr %out.addr.next, align 4
%1 = add i32 %0, 1
store i32 %1, ptr %in.addr.next, align 4
%incdec.in = getelementptr inbounds i8, ptr %in.addr.next, i64 4
%incdec.out = getelementptr inbounds i8, ptr %out.addr.next, i64 4
%inc = add nuw nsw i32 %inc.next, 1
%exitcond.not = icmp eq i32 %inc, %cnt
br i1 %exitcond.not, label %for.end, label %for.body

for.end: ; preds = %for.body, %entry
ret void
}
Loading