From 62e49e7f2c6ef644ed59f11e350ec52d7b186217 Mon Sep 17 00:00:00 2001 From: wangpc Date: Mon, 25 Nov 2024 16:54:44 +0800 Subject: [PATCH] [RISCV] Add software pipeliner support This patch adds basic support of `MachinePipeliner` and disable it by default. The functionality should be OK and all llvm-test-suite tests have passed. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 81 ++++++++++++++ llvm/lib/Target/RISCV/RISCVInstrInfo.h | 3 + llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 4 + llvm/lib/Target/RISCV/RISCVSubtarget.h | 4 + llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 8 ++ llvm/test/CodeGen/RISCV/machine-pipeliner.ll | 109 +++++++++++++++++++ 6 files changed, 209 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/machine-pipeliner.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 7e0063589b6f4c..0af8161a307abd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -4248,3 +4248,84 @@ bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) { return false; return LHS.getImm() <= RHS.getImm(); } + +namespace { +class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + const MachineInstr *LHS; + const MachineInstr *RHS; + SmallVector Cond; + +public: + RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS, + const SmallVectorImpl &Cond) + : LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {} + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Make the instructions for loop control be placed in stage 0. + // The predecessors of LHS/RHS are considered by the caller. + if (LHS && MI == LHS) + return true; + if (RHS && MI == RHS) + return true; + return false; + } + + std::optional createTripCountGreaterCondition( + int TC, MachineBasicBlock &MBB, + SmallVectorImpl &CondParam) override { + // A branch instruction will be inserted as "if (Cond) goto epilogue". + // Cond is normalized for such use. + // The predecessors of the branch are assumed to have already been inserted. + CondParam = Cond; + return {}; + } + + void setPreheader(MachineBasicBlock *NewPreheader) override {} + + void adjustTripCount(int TripCountAdjust) override {} + + void disposed() override {} +}; +} // namespace + +std::unique_ptr +RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + if (analyzeBranch(*LoopBB, TBB, FBB, Cond, /*AllowModify=*/false)) + return nullptr; + + // Infinite loops are not supported + if (TBB == LoopBB && FBB == LoopBB) + return nullptr; + + // Must be conditional branch + if (FBB == nullptr) + return nullptr; + + assert((TBB == LoopBB || FBB == LoopBB) && + "The Loop must be a single-basic-block loop"); + + // Normalization for createTripCountGreaterCondition() + if (TBB == LoopBB) + reverseBranchCondition(Cond); + + const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * { + if (!Op.isReg()) + return nullptr; + Register Reg = Op.getReg(); + if (!Reg.isVirtual()) + return nullptr; + return MRI.getVRegDef(Reg); + }; + + const MachineInstr *LHS = FindRegDef(Cond[1]); + const MachineInstr *RHS = FindRegDef(Cond[2]); + if (LHS && LHS->isPHI()) + return nullptr; + if (RHS && RHS->isPHI()) + return nullptr; + + return std::make_unique(LHS, RHS, Cond); +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 005cba5d35610e..7e8bcd451a8ef8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -298,6 +298,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 90131d82534b1c..6e212dc58e6ddd 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -194,6 +194,10 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const { bool RISCVSubtarget::enableSubRegLiveness() const { return true; } +bool RISCVSubtarget::enableMachinePipeliner() const { + return getSchedModel().hasInstrSchedModel(); +} + /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). bool RISCVSubtarget::useAA() const { return UseAA; } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 096d696c71f8f5..87d508c3941737 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -324,6 +324,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool enableSubRegLiveness() const override; + bool enableMachinePipeliner() const override; + + bool useDFAforSMS() const override { return false; } + bool useAA() const override; unsigned getCacheLineSize() const override { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 0b8407943a9078..f6ccbfbe217df6 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -112,6 +112,11 @@ static cl::opt DisableVectorMaskMutation( cl::desc("Disable the vector mask scheduling mutation"), cl::init(false), cl::Hidden); +static cl::opt + EnableMachinePipeliner("riscv-enable-pipeliner", + cl::desc("Enable Machine Pipeliner for RISC-V"), + cl::init(false), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -603,6 +608,9 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVInsertReadWriteCSRPass()); addPass(createRISCVInsertWriteVXRMPass()); addPass(createRISCVLandingPadSetupPass()); + + if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner) + addPass(&MachinePipelinerID); } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/machine-pipeliner.ll b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll new file mode 100644 index 00000000000000..d2500985766874 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/machine-pipeliner.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=false < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOT-PIPELINED +; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=true < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PIPELINED + +; We shouldn't pipeline this loop as one operand of branch is a PHI. +define i32 @test_phi() { +; CHECK-LABEL: test_phi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: sh a0, 0(zero) +; CHECK-NEXT: bnez a1, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 0 + +for.body: ; preds = %for.body, %entry + %indvars.iv1 = phi i64 [ 0, %entry ], [ 1, %for.body ] + store i16 1, ptr null, align 4 + %exitcond.not.31 = icmp eq i64 %indvars.iv1, 0 + br i1 %exitcond.not.31, label %for.cond.cleanup, label %for.body +} + +define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cnt) { +; CHECK-NOT-PIPELINED-LABEL: test_pipelined_1: +; CHECK-NOT-PIPELINED: # %bb.0: # %entry +; CHECK-NOT-PIPELINED-NEXT: blez a2, .LBB1_3 +; CHECK-NOT-PIPELINED-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NOT-PIPELINED-NEXT: addi a2, a2, -1 +; CHECK-NOT-PIPELINED-NEXT: sh2add.uw a2, a2, a1 +; CHECK-NOT-PIPELINED-NEXT: addi a2, a2, 4 +; CHECK-NOT-PIPELINED-NEXT: .LBB1_2: # %for.body +; CHECK-NOT-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NOT-PIPELINED-NEXT: lw a3, 0(a1) +; CHECK-NOT-PIPELINED-NEXT: addi a1, a1, 4 +; CHECK-NOT-PIPELINED-NEXT: addi a3, a3, 1 +; CHECK-NOT-PIPELINED-NEXT: sw a3, 0(a0) +; CHECK-NOT-PIPELINED-NEXT: addi a0, a0, 4 +; CHECK-NOT-PIPELINED-NEXT: bne a1, a2, .LBB1_2 +; CHECK-NOT-PIPELINED-NEXT: .LBB1_3: # %for.end +; CHECK-NOT-PIPELINED-NEXT: ret +; +; CHECK-PIPELINED-LABEL: test_pipelined_1: +; CHECK-PIPELINED: # %bb.0: # %entry +; CHECK-PIPELINED-NEXT: blez a2, .LBB1_6 +; CHECK-PIPELINED-NEXT: # %bb.1: # %for.body.preheader +; CHECK-PIPELINED-NEXT: lw a4, 0(a1) +; CHECK-PIPELINED-NEXT: addi a2, a2, -1 +; CHECK-PIPELINED-NEXT: sh2add.uw a6, a2, a1 +; CHECK-PIPELINED-NEXT: addi a2, a0, 4 +; CHECK-PIPELINED-NEXT: addi a1, a1, 4 +; CHECK-PIPELINED-NEXT: addi a6, a6, 4 +; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_5 +; CHECK-PIPELINED-NEXT: # %bb.2: # %for.body +; CHECK-PIPELINED-NEXT: lw a5, 0(a1) +; CHECK-PIPELINED-NEXT: addi a3, a2, 4 +; CHECK-PIPELINED-NEXT: addi a4, a4, 1 +; CHECK-PIPELINED-NEXT: addi a1, a1, 4 +; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_4 +; CHECK-PIPELINED-NEXT: .LBB1_3: # %for.body +; CHECK-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-PIPELINED-NEXT: sw a4, 0(a0) +; CHECK-PIPELINED-NEXT: mv a4, a5 +; CHECK-PIPELINED-NEXT: lw a5, 0(a1) +; CHECK-PIPELINED-NEXT: mv a0, a2 +; CHECK-PIPELINED-NEXT: mv a2, a3 +; CHECK-PIPELINED-NEXT: addi a3, a3, 4 +; CHECK-PIPELINED-NEXT: addi a4, a4, 1 +; CHECK-PIPELINED-NEXT: addi a1, a1, 4 +; CHECK-PIPELINED-NEXT: bne a1, a6, .LBB1_3 +; CHECK-PIPELINED-NEXT: .LBB1_4: +; CHECK-PIPELINED-NEXT: sw a4, 0(a0) +; CHECK-PIPELINED-NEXT: mv a0, a2 +; CHECK-PIPELINED-NEXT: mv a4, a5 +; CHECK-PIPELINED-NEXT: .LBB1_5: +; CHECK-PIPELINED-NEXT: addi a4, a4, 1 +; CHECK-PIPELINED-NEXT: sw a4, 0(a0) +; CHECK-PIPELINED-NEXT: .LBB1_6: # %for.end +; CHECK-PIPELINED-NEXT: ret +entry: + %cmp = icmp sgt i32 %cnt, 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %inc.next = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %in.addr.next = phi ptr [ %incdec.in, %for.body ], [ %in, %entry ] + %out.addr.next = phi ptr [ %incdec.out, %for.body ], [ %out, %entry ] + %0 = load i32, ptr %out.addr.next, align 4 + %1 = add i32 %0, 1 + store i32 %1, ptr %in.addr.next, align 4 + %incdec.in = getelementptr inbounds i8, ptr %in.addr.next, i64 4 + %incdec.out = getelementptr inbounds i8, ptr %out.addr.next, i64 4 + %inc = add nuw nsw i32 %inc.next, 1 + %exitcond.not = icmp eq i32 %inc, %cnt + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +}