Skip to content

Commit 53be850

Browse files
committed
Also extract reverse mask
1 parent 7c2493d commit 53be850

File tree

9 files changed

+133
-363
lines changed

9 files changed

+133
-363
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6941,7 +6941,13 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
69416941
return true;
69426942

69436943
if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(&R)) {
6944-
if (MemR->isReverse()) {
6944+
bool IsReverse = CostCtx.CM.getWideningDecision(UI, VF) ==
6945+
LoopVectorizationCostModel::CM_Widen_Reverse;
6946+
if (IsReverse) {
6947+
// The legacy model have not computed the cost of reverse mask.
6948+
if (CostCtx.CM.Legal->isMaskRequired(UI))
6949+
return true;
6950+
69456951
// If the stored value of a reverse store is invariant, LICM will
69466952
// hoist the reverse operation to the preheader. In this case, the
69476953
// result of the VPlan-based cost model will diverge from that of
@@ -7685,9 +7691,12 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
76857691
Ptr = VectorPtr;
76867692
}
76877693

7694+
if (Reverse && Mask)
7695+
Mask = Builder.createNaryOp(VPInstruction::Reverse, Mask, I->getDebugLoc());
7696+
76887697
if (auto *Load = dyn_cast<LoadInst>(I)) {
76897698
auto *LoadR =
7690-
new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
7699+
new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive,
76917700
VPIRMetadata(*Load, LVer), Load->getDebugLoc());
76927701
if (Reverse) {
76937702
Builder.insert(LoadR);
@@ -7703,7 +7712,7 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
77037712
StoredVal = Builder.createNaryOp(VPInstruction::Reverse, StoredVal,
77047713
Store->getDebugLoc());
77057714
return new VPWidenStoreRecipe(*Store, Ptr, StoredVal, Mask, Consecutive,
7706-
Reverse, VPIRMetadata(*Store, LVer),
7715+
VPIRMetadata(*Store, LVer),
77077716
Store->getDebugLoc());
77087717
}
77097718

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,9 +3014,6 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
30143014
/// Whether the accessed addresses are consecutive.
30153015
bool Consecutive;
30163016

3017-
/// Whether the consecutive accessed addresses are in reverse order.
3018-
bool Reverse;
3019-
30203017
/// Whether the memory access is masked.
30213018
bool IsMasked = false;
30223019

@@ -3030,12 +3027,10 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
30303027

30313028
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
30323029
std::initializer_list<VPValue *> Operands,
3033-
bool Consecutive, bool Reverse,
3034-
const VPIRMetadata &Metadata, DebugLoc DL)
3030+
bool Consecutive, const VPIRMetadata &Metadata,
3031+
DebugLoc DL)
30353032
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3036-
Consecutive(Consecutive), Reverse(Reverse) {
3037-
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3038-
}
3033+
Consecutive(Consecutive) {}
30393034

30403035
public:
30413036
VPWidenMemoryRecipe *clone() override {
@@ -3057,10 +3052,6 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
30573052
/// Return whether the loaded-from / stored-to addresses are consecutive.
30583053
bool isConsecutive() const { return Consecutive; }
30593054

3060-
/// Return whether the consecutive loaded/stored addresses are in reverse
3061-
/// order.
3062-
bool isReverse() const { return Reverse; }
3063-
30643055
/// Return the address accessed by this recipe.
30653056
VPValue *getAddr() const { return getOperand(0); }
30663057

@@ -3091,18 +3082,16 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
30913082
struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
30923083
public VPValue {
30933084
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
3094-
bool Consecutive, bool Reverse,
3095-
const VPIRMetadata &Metadata, DebugLoc DL)
3085+
bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
30963086
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3097-
Reverse, Metadata, DL),
3087+
Metadata, DL),
30983088
VPValue(this, &Load) {
30993089
setMask(Mask);
31003090
}
31013091

31023092
VPWidenLoadRecipe *clone() override {
31033093
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3104-
getMask(), Consecutive, Reverse, *this,
3105-
getDebugLoc());
3094+
getMask(), Consecutive, *this, getDebugLoc());
31063095
}
31073096

31083097
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
@@ -3133,7 +3122,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
31333122
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
31343123
VPValue *Mask)
31353124
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3136-
{Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3125+
{Addr, &EVL}, L.isConsecutive(), L,
31373126
L.getDebugLoc()),
31383127
VPValue(this, &getIngredient()) {
31393128
setMask(Mask);
@@ -3171,17 +3160,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
31713160
/// to store to and an optional mask.
31723161
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
31733162
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3174-
VPValue *Mask, bool Consecutive, bool Reverse,
3163+
VPValue *Mask, bool Consecutive,
31753164
const VPIRMetadata &Metadata, DebugLoc DL)
31763165
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3177-
Consecutive, Reverse, Metadata, DL) {
3166+
Consecutive, Metadata, DL) {
31783167
setMask(Mask);
31793168
}
31803169

31813170
VPWidenStoreRecipe *clone() override {
31823171
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
31833172
getStoredValue(), getMask(), Consecutive,
3184-
Reverse, *this, getDebugLoc());
3173+
*this, getDebugLoc());
31853174
}
31863175

31873176
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -3216,7 +3205,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
32163205
VPValue *Mask)
32173206
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
32183207
{Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3219-
S.isReverse(), S, S.getDebugLoc()) {
3208+
S, S.getDebugLoc()) {
32203209
setMask(Mask);
32213210
}
32223211

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 17 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2231,8 +2231,6 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
22312231
return TTI::CastContextHint::None;
22322232
if (!WidenMemoryRecipe->isConsecutive())
22332233
return TTI::CastContextHint::GatherScatter;
2234-
if (WidenMemoryRecipe->isReverse())
2235-
return TTI::CastContextHint::Reversed;
22362234
if (WidenMemoryRecipe->isMasked())
22372235
return TTI::CastContextHint::Masked;
22382236
return TTI::CastContextHint::Normal;
@@ -2241,6 +2239,7 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
22412239
using namespace llvm::VPlanPatternMatch;
22422240
VPValue *Operand = getOperand(0);
22432241
TTI::CastContextHint CCH = TTI::CastContextHint::None;
2242+
bool IsReverse = false;
22442243
// For Trunc/FPTrunc, get the context from the only user.
22452244
if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
22462245
auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
@@ -2250,8 +2249,10 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
22502249
};
22512250

22522251
if (VPRecipeBase *Recipe = GetOnlyUser(this)) {
2253-
if (match(Recipe, m_VPInstruction<VPInstruction::Reverse>(m_VPValue())))
2252+
if (match(Recipe, m_VPInstruction<VPInstruction::Reverse>(m_VPValue()))) {
22542253
Recipe = GetOnlyUser(cast<VPInstruction>(Recipe));
2254+
IsReverse = true;
2255+
}
22552256
if (Recipe)
22562257
CCH = ComputeCCH(Recipe);
22572258
}
@@ -2263,13 +2264,17 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
22632264
CCH = TTI::CastContextHint::Normal;
22642265
else if (auto *Recipe = Operand->getDefiningRecipe()) {
22652266
VPValue *ReverseOp;
2266-
if (match(Recipe,
2267-
m_VPInstruction<VPInstruction::Reverse>(m_VPValue(ReverseOp))))
2267+
if (match(Recipe, m_VPInstruction<VPInstruction::Reverse>(
2268+
m_VPValue(ReverseOp)))) {
22682269
Recipe = ReverseOp->getDefiningRecipe();
2270+
IsReverse = true;
2271+
}
22692272
if (Recipe)
22702273
CCH = ComputeCCH(Recipe);
22712274
}
22722275
}
2276+
if (IsReverse && CCH != TTI::CastContextHint::None)
2277+
CCH = TTI::CastContextHint::Reversed;
22732278

22742279
auto *SrcTy =
22752280
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
@@ -3273,9 +3278,6 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
32733278
// TODO: Using the original IR may not be accurate.
32743279
// Currently, ARM will use the underlying IR to calculate gather/scatter
32753280
// instruction cost.
3276-
assert(!Reverse &&
3277-
"Inconsecutive memory access should not have the order.");
3278-
32793281
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
32803282
Type *PtrTy = Ptr->getType();
32813283

@@ -3312,13 +3314,8 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
33123314

33133315
auto &Builder = State.Builder;
33143316
Value *Mask = nullptr;
3315-
if (auto *VPMask = getMask()) {
3316-
// Mask reversal is only needed for non-all-one (null) masks, as reverse
3317-
// of a null all-one mask is a null mask.
3317+
if (auto *VPMask = getMask())
33183318
Mask = State.get(VPMask);
3319-
if (isReverse())
3320-
Mask = Builder.CreateVectorReverse(Mask, "reverse");
3321-
}
33223319

33233320
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
33243321
Value *NewLI;
@@ -3346,17 +3343,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
33463343
}
33473344
#endif
33483345

3349-
/// Use all-true mask for reverse rather than actual mask, as it avoids a
3350-
/// dependence w/o affecting the result.
3351-
static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
3352-
Value *EVL, const Twine &Name) {
3353-
VectorType *ValTy = cast<VectorType>(Operand->getType());
3354-
Value *AllTrueMask =
3355-
Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
3356-
return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
3357-
{Operand, AllTrueMask, EVL}, nullptr, Name);
3358-
}
3359-
33603346
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
33613347
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
33623348
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
@@ -3368,13 +3354,10 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
33683354
Value *EVL = State.get(getEVL(), VPLane(0));
33693355
Value *Addr = State.get(getAddr(), !CreateGather);
33703356
Value *Mask = nullptr;
3371-
if (VPValue *VPMask = getMask()) {
3357+
if (VPValue *VPMask = getMask())
33723358
Mask = State.get(VPMask);
3373-
if (isReverse())
3374-
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
3375-
} else {
3359+
else
33763360
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3377-
}
33783361

33793362
if (CreateGather) {
33803363
NewLI =
@@ -3428,13 +3411,8 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
34283411
auto &Builder = State.Builder;
34293412

34303413
Value *Mask = nullptr;
3431-
if (auto *VPMask = getMask()) {
3432-
// Mask reversal is only needed for non-all-one (null) masks, as reverse
3433-
// of a null all-one mask is a null mask.
3414+
if (auto *VPMask = getMask())
34343415
Mask = State.get(VPMask);
3435-
if (isReverse())
3436-
Mask = Builder.CreateVectorReverse(Mask, "reverse");
3437-
}
34383416

34393417
Value *StoredVal = State.get(StoredVPValue);
34403418
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
@@ -3467,13 +3445,11 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
34673445
Value *StoredVal = State.get(StoredValue);
34683446
Value *EVL = State.get(getEVL(), VPLane(0));
34693447
Value *Mask = nullptr;
3470-
if (VPValue *VPMask = getMask()) {
3448+
if (VPValue *VPMask = getMask())
34713449
Mask = State.get(VPMask);
3472-
if (isReverse())
3473-
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
3474-
} else {
3450+
else
34753451
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3476-
}
3452+
34773453
Value *Addr = State.get(getAddr(), !CreateScatter);
34783454
if (CreateScatter) {
34793455
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,13 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
8686
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
8787
NewRecipe = new VPWidenLoadRecipe(
8888
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
89-
false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load),
89+
false /*Consecutive*/, VPIRMetadata(*Load),
9090
Ingredient.getDebugLoc());
9191
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
9292
NewRecipe = new VPWidenStoreRecipe(
9393
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
94-
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
95-
VPIRMetadata(*Store), Ingredient.getDebugLoc());
94+
nullptr /*Mask*/, false /*Consecutive*/, VPIRMetadata(*Store),
95+
Ingredient.getDebugLoc());
9696
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
9797
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
9898
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
@@ -2185,10 +2185,25 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
21852185
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
21862186
assert(OrigMask && "Unmasked recipe when folding tail");
21872187
// HeaderMask will be handled using EVL.
2188+
VPValue *NewMask = OrigMask;
2189+
VPWidenIntrinsicRecipe *ReverseMask = nullptr;
2190+
bool IsReverse = match(
2191+
OrigMask, m_VPInstruction<VPInstruction::Reverse>(m_VPValue(NewMask)));
2192+
if (HeaderMask == NewMask)
2193+
return nullptr;
2194+
21882195
VPValue *Mask;
2189-
if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask))))
2190-
return Mask;
2191-
return HeaderMask == OrigMask ? nullptr : OrigMask;
2196+
if (match(NewMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask))))
2197+
NewMask = Mask;
2198+
2199+
if (IsReverse) {
2200+
auto *R = cast<VPInstruction>(OrigMask);
2201+
ReverseMask = getEVLReverse(*R, TypeInfo, EVL);
2202+
ReverseMask->insertBefore(R);
2203+
ReverseMask->setOperand(0, NewMask);
2204+
NewMask = ReverseMask;
2205+
}
2206+
return NewMask;
21922207
};
21932208

21942209
/// Adjust any end pointers so that they point to the end of EVL lanes not VF.
@@ -2201,7 +2216,9 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
22012216
assert(
22022217
all_of(EndPtr->users(),
22032218
[](VPUser *U) {
2204-
return cast<VPWidenMemoryRecipe>(U)->isReverse();
2219+
auto *MaskR = dyn_cast<VPInstruction>(
2220+
cast<VPWidenMemoryRecipe>(U)->getMask());
2221+
return MaskR && MaskR->getOpcode() == VPInstruction::Reverse;
22052222
}) &&
22062223
"VPVectorEndPointRecipe not used by reversed widened memory recipe?");
22072224
VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone();
@@ -2364,8 +2381,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
23642381
if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(EVLRecipe);
23652382
MemR && match(MemR->getAddr(),
23662383
m_VectorEndPointer(m_VPValue(), m_Specific(&EVL)))) {
2367-
assert(MemR->isReverse() &&
2368-
"Only reverse access uses VPVectorEndPointerRecipe as address");
23692384
VPRecipeBase *Candidate = nullptr;
23702385
if (auto *LoadR = dyn_cast<VPWidenLoadEVLRecipe>(MemR)) {
23712386
assert(LoadR->getNumUsers() == 1 &&
@@ -3764,8 +3779,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
37643779
// process one original iteration.
37653780
auto *L = new VPWidenLoadRecipe(
37663781
*cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
3767-
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
3768-
/*Reverse=*/false, {}, LoadGroup->getDebugLoc());
3782+
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true, {},
3783+
LoadGroup->getDebugLoc());
37693784
L->insertBefore(LoadGroup);
37703785
return L;
37713786
}
@@ -3807,8 +3822,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
38073822

38083823
auto *S = new VPWidenStoreRecipe(
38093824
*cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
3810-
StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
3811-
/*Reverse=*/false, {}, StoreGroup->getDebugLoc());
3825+
StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true, {},
3826+
StoreGroup->getDebugLoc());
38123827
S->insertBefore(StoreGroup);
38133828
StoreGroup->eraseFromParent();
38143829
}

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
2222
; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
2323
; CHECK: %[[REVERSE7:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[WIDEMSKLOAD]])
2424
; CHECK: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[REVERSE7]]
25-
; CHECK: %[[REVERSE8:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[FADD]])
2625
; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
26+
; CHECK: %[[REVERSE8:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[FADD]])
2727
; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[REVERSE8]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
2828

2929
entry:

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
4646
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -24
4747
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -56
4848
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
49-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
5049
; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
50+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
5151
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
5252
; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], splat (double 1.000000e+00)
5353
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], splat (double 1.000000e+00)

0 commit comments

Comments
 (0)