Skip to content

Commit ec3525f

Browse files
authored
[NVPTX] Attempt to load params using symbol addition node directly (#119935)
During instruction selection on load instructions, transform loads of [register+offset] into [symbol+offset] if the register value is the result of an ADD instruction(s) of a symbol and constant(s). This enables the removal of any ADD(s) of the symbol that are not combined with the load to create a ld.param. This is normally not an issue when DAG combines are enabled as any extra ADDs would be folded. However, when DAG combines are disabled, there may be cases where an ADD of a symbol is consumed by multiple other nodes and is retained in generated code as a PTX `add` instruction that uses the symbol as an operand - this is illegal PTX.
1 parent 98e2328 commit ec3525f

File tree

2 files changed

+71
-7
lines changed

2 files changed

+71
-7
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

+17-7
Original file line numberDiff line numberDiff line change
@@ -2482,15 +2482,25 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
24822482
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
24832483
SDValue &Base, SDValue &Offset,
24842484
MVT VT) {
2485-
if (isAddLike(Addr)) {
2486-
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2487-
SDValue base = Addr.getOperand(0);
2488-
if (SelectDirectAddr(base, Base)) {
2489-
Offset =
2490-
CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), VT);
2491-
return true;
2485+
std::function<std::optional<uint64_t>(SDValue, uint64_t)>
2486+
FindRootAddressAndTotalOffset =
2487+
[&](SDValue Addr,
2488+
uint64_t AccumulatedOffset) -> std::optional<uint64_t> {
2489+
if (isAddLike(Addr)) {
2490+
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2491+
SDValue PossibleBaseAddr = Addr.getOperand(0);
2492+
AccumulatedOffset += CN->getZExtValue();
2493+
if (SelectDirectAddr(PossibleBaseAddr, Base))
2494+
return AccumulatedOffset;
2495+
return FindRootAddressAndTotalOffset(PossibleBaseAddr,
2496+
AccumulatedOffset);
24922497
}
24932498
}
2499+
return std::nullopt;
2500+
};
2501+
if (auto AccumulatedOffset = FindRootAddressAndTotalOffset(Addr, 0)) {
2502+
Offset = CurDAG->getTargetConstant(*AccumulatedOffset, SDLoc(OpNode), VT);
2503+
return true;
24942504
}
24952505
return false;
24962506
}

llvm/test/CodeGen/NVPTX/param-add.ll

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | %ptxas-verify %}
4+
5+
; REQUIRES: asserts
6+
; asserts are required for --debug-counter=dagcombine=0 to have the intended
7+
; effect of disabling DAG combines, which exposes the bug. When combines are
8+
; enabled the bug does not occur.
9+
10+
%struct.1float = type <{ [1 x float] }>
11+
12+
declare i32 @callee(%struct.1float %a)
13+
14+
define i32 @test(%struct.1float alignstack(32) %data) {
15+
; CHECK-LABEL: test(
16+
; CHECK: {
17+
; CHECK-NEXT: .reg .b32 %r<18>;
18+
; CHECK-NEXT: .reg .f32 %f<2>;
19+
; CHECK-EMPTY:
20+
; CHECK-NEXT: // %bb.0:
21+
; CHECK-NEXT: ld.param.u8 %r1, [test_param_0+1];
22+
; CHECK-NEXT: shl.b32 %r2, %r1, 8;
23+
; CHECK-NEXT: ld.param.u8 %r3, [test_param_0];
24+
; CHECK-NEXT: or.b32 %r4, %r2, %r3;
25+
; CHECK-NEXT: ld.param.u8 %r5, [test_param_0+3];
26+
; CHECK-NEXT: shl.b32 %r6, %r5, 8;
27+
; CHECK-NEXT: ld.param.u8 %r7, [test_param_0+2];
28+
; CHECK-NEXT: or.b32 %r8, %r6, %r7;
29+
; CHECK-NEXT: shl.b32 %r9, %r8, 16;
30+
; CHECK-NEXT: or.b32 %r17, %r9, %r4;
31+
; CHECK-NEXT: mov.b32 %f1, %r17;
32+
; CHECK-NEXT: shr.u32 %r12, %r17, 8;
33+
; CHECK-NEXT: shr.u32 %r13, %r17, 16;
34+
; CHECK-NEXT: shr.u32 %r14, %r17, 24;
35+
; CHECK-NEXT: { // callseq 0, 0
36+
; CHECK-NEXT: .param .align 1 .b8 param0[4];
37+
; CHECK-NEXT: st.param.b8 [param0], %r17;
38+
; CHECK-NEXT: st.param.b8 [param0+1], %r12;
39+
; CHECK-NEXT: st.param.b8 [param0+2], %r13;
40+
; CHECK-NEXT: st.param.b8 [param0+3], %r14;
41+
; CHECK-NEXT: .param .b32 retval0;
42+
; CHECK-NEXT: call.uni (retval0),
43+
; CHECK-NEXT: callee,
44+
; CHECK-NEXT: (
45+
; CHECK-NEXT: param0
46+
; CHECK-NEXT: );
47+
; CHECK-NEXT: ld.param.b32 %r15, [retval0];
48+
; CHECK-NEXT: } // callseq 0
49+
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
50+
; CHECK-NEXT: ret;
51+
52+
%1 = call i32 @callee(%struct.1float %data)
53+
ret i32 %1
54+
}

0 commit comments

Comments
 (0)