Skip to content

Commit 7a89674

Browse files
committed
Fix
1 parent 515946b commit 7a89674

File tree

2 files changed

+71
-7
lines changed

2 files changed

+71
-7
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

+17-7
Original file line numberDiff line numberDiff line change
@@ -2482,15 +2482,25 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
24822482
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
24832483
SDValue &Base, SDValue &Offset,
24842484
MVT VT) {
2485-
if (isAddLike(Addr)) {
2486-
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2487-
SDValue base = Addr.getOperand(0);
2488-
if (SelectDirectAddr(base, Base)) {
2489-
Offset =
2490-
CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), VT);
2491-
return true;
2485+
std::function<std::optional<uint64_t>(SDValue, uint64_t)>
2486+
FindRootAddressAndTotalOffset =
2487+
[&](SDValue Addr,
2488+
uint64_t AccumulatedOffset) -> std::optional<uint64_t> {
2489+
if (isAddLike(Addr)) {
2490+
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2491+
SDValue PossibleBaseAddr = Addr.getOperand(0);
2492+
AccumulatedOffset += CN->getZExtValue();
2493+
if (SelectDirectAddr(PossibleBaseAddr, Base))
2494+
return AccumulatedOffset;
2495+
return FindRootAddressAndTotalOffset(PossibleBaseAddr,
2496+
AccumulatedOffset);
24922497
}
24932498
}
2499+
return std::nullopt;
2500+
};
2501+
if (auto AccumulatedOffset = FindRootAddressAndTotalOffset(Addr, 0)) {
2502+
Offset = CurDAG->getTargetConstant(*AccumulatedOffset, SDLoc(OpNode), VT);
2503+
return true;
24942504
}
24952505
return false;
24962506
}

llvm/test/CodeGen/NVPTX/param-add.ll

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 --debug-counter=dagcombine=0 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
4+
5+
; REQUIRES: asserts
6+
; asserts are required for --debug-counter=dagcombine=0 to have the intended
7+
; effect of disabling DAG combines, which exposes the bug. When combines are
8+
; enabled the bug does not occur.
9+
10+
%struct.1float = type <{ [1 x float] }>
11+
12+
declare i32 @callee(%struct.1float %a)
13+
14+
define i32 @test(%struct.1float alignstack(32) %data) {
15+
; CHECK-LABEL: test(
16+
; CHECK: {
17+
; CHECK-NEXT: .reg .b32 %r<18>;
18+
; CHECK-NEXT: .reg .f32 %f<2>;
19+
; CHECK-EMPTY:
20+
; CHECK-NEXT: // %bb.0:
21+
; CHECK-NEXT: ld.param.u8 %r1, [test_param_0+1];
22+
; CHECK-NEXT: shl.b32 %r2, %r1, 8;
23+
; CHECK-NEXT: ld.param.u8 %r3, [test_param_0];
24+
; CHECK-NEXT: or.b32 %r4, %r2, %r3;
25+
; CHECK-NEXT: ld.param.u8 %r5, [test_param_0+3];
26+
; CHECK-NEXT: shl.b32 %r6, %r5, 8;
27+
; CHECK-NEXT: ld.param.u8 %r7, [test_param_0+2];
28+
; CHECK-NEXT: or.b32 %r8, %r6, %r7;
29+
; CHECK-NEXT: shl.b32 %r9, %r8, 16;
30+
; CHECK-NEXT: or.b32 %r17, %r9, %r4;
31+
; CHECK-NEXT: mov.b32 %f1, %r17;
32+
; CHECK-NEXT: shr.u32 %r12, %r17, 8;
33+
; CHECK-NEXT: shr.u32 %r13, %r17, 16;
34+
; CHECK-NEXT: shr.u32 %r14, %r17, 24;
35+
; CHECK-NEXT: { // callseq 0, 0
36+
; CHECK-NEXT: .param .align 1 .b8 param0[4];
37+
; CHECK-NEXT: st.param.b8 [param0], %r17;
38+
; CHECK-NEXT: st.param.b8 [param0+1], %r12;
39+
; CHECK-NEXT: st.param.b8 [param0+2], %r13;
40+
; CHECK-NEXT: st.param.b8 [param0+3], %r14;
41+
; CHECK-NEXT: .param .b32 retval0;
42+
; CHECK-NEXT: call.uni (retval0),
43+
; CHECK-NEXT: callee,
44+
; CHECK-NEXT: (
45+
; CHECK-NEXT: param0
46+
; CHECK-NEXT: );
47+
; CHECK-NEXT: ld.param.b32 %r15, [retval0];
48+
; CHECK-NEXT: } // callseq 0
49+
; CHECK-NEXT: st.param.b32 [func_retval0], %r15;
50+
; CHECK-NEXT: ret;
51+
52+
%1 = call i32 @callee(%struct.1float %data)
53+
ret i32 %1
54+
}

0 commit comments

Comments
 (0)