Skip to content

Commit 3c9c94a

Browse files
committed
Revert "[SLP]Fix mask generation after cost estimation"
This reverts commit 547ba97 to fix buildbots reported in https://lab.llvm.org/buildbot/#/builders/123/builds/11370, https://lab.llvm.org/buildbot/#/builders/133/builds/9492
1 parent c189df8 commit 3c9c94a

File tree

2 files changed

+10
-23
lines changed

2 files changed

+10
-23
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+8-21
Original file line numberDiff line numberDiff line change
@@ -13443,15 +13443,14 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1344313443
for_each(SubMask, [&](int &Idx) {
1344413444
if (Idx == PoisonMaskElem)
1344513445
return;
13446-
Idx = (Idx % VF) - ((MinElement / NewVF) * NewVF) +
13446+
Idx = (Idx % VF) - (MinElement % VF) +
1344713447
(Idx >= static_cast<int>(VF) ? NewVF : 0);
1344813448
});
13449-
} else {
13450-
NewVF = VF;
13449+
VF = NewVF;
1345113450
}
1345213451

1345313452
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
13454-
auto *VecTy = getWidenedType(VL.front()->getType(), NewVF);
13453+
auto *VecTy = getWidenedType(VL.front()->getType(), VF);
1345513454
auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
1345613455
auto GetShuffleCost = [&,
1345713456
&TTI = *TTI](ArrayRef<int> Mask,
@@ -13476,7 +13475,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1347613475
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1347713476
bool IsIdentity = true;
1347813477
for (auto [I, Idx] : enumerate(FirstMask)) {
13479-
if (Idx >= static_cast<int>(NewVF)) {
13478+
if (Idx >= static_cast<int>(VF)) {
1348013479
Idx = PoisonMaskElem;
1348113480
} else {
1348213481
DemandedElts.clearBit(I);
@@ -13499,12 +13498,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1349913498
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1350013499
bool IsIdentity = true;
1350113500
for (auto [I, Idx] : enumerate(SecondMask)) {
13502-
if (Idx < static_cast<int>(NewVF) && Idx >= 0) {
13501+
if (Idx < static_cast<int>(VF) && Idx >= 0) {
1350313502
Idx = PoisonMaskElem;
1350413503
} else {
1350513504
DemandedElts.clearBit(I);
1350613505
if (Idx != PoisonMaskElem) {
13507-
Idx -= NewVF;
13506+
Idx -= VF;
1350813507
IsIdentity &= static_cast<int>(I) == Idx;
1350913508
}
1351013509
}
@@ -13524,24 +13523,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1352413523
/*Extract=*/false, CostKind);
1352513524
const TreeEntry *BestEntry = nullptr;
1352613525
if (FirstShuffleCost < ShuffleCost) {
13527-
std::for_each(std::next(Mask.begin(), Part * VL.size()),
13528-
std::next(Mask.begin(), (Part + 1) * VL.size()),
13529-
[&](int &Idx) {
13530-
if (Idx >= static_cast<int>(VF))
13531-
Idx = PoisonMaskElem;
13532-
});
13526+
copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
1353313527
BestEntry = Entries.front();
1353413528
ShuffleCost = FirstShuffleCost;
1353513529
}
1353613530
if (SecondShuffleCost < ShuffleCost) {
13537-
std::for_each(std::next(Mask.begin(), Part * VL.size()),
13538-
std::next(Mask.begin(), (Part + 1) * VL.size()),
13539-
[&](int &Idx) {
13540-
if (Idx < static_cast<int>(VF))
13541-
Idx = PoisonMaskElem;
13542-
else
13543-
Idx -= VF;
13544-
});
13531+
copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
1354513532
BestEntry = Entries[1];
1354613533
ShuffleCost = SecondShuffleCost;
1354713534
}

llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define i16 @test(i16 %v1, i16 %v2) {
1010
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
1111
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
1212
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
13-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
14-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
13+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
14+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V2]], i32 1
1515
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
1616
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
1717
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer

0 commit comments

Comments
 (0)