@@ -15691,48 +15691,51 @@ bool AArch64TargetLowering::shouldSinkOperands(
15691
15691
return false;
15692
15692
}
15693
15693
15694
- static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy,
15695
- bool IsLittleEndian) {
15696
- Value *Op = ZExt->getOperand(0);
15697
- auto *SrcTy = cast<FixedVectorType>(Op->getType());
15698
- auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15699
- auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15694
+ static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth,
15695
+ unsigned NumElts, bool IsLittleEndian,
15696
+ SmallVectorImpl<int> &Mask) {
15700
15697
if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
15701
15698
return false;
15702
15699
15703
- assert(DstWidth % SrcWidth == 0 &&
15704
- "TBL lowering is not supported for a ZExt instruction with this "
15705
- "source & destination element type.");
15706
- unsigned ZExtFactor = DstWidth / SrcWidth;
15700
+ if (DstWidth % SrcWidth != 0)
15701
+ return false;
15702
+
15703
+ unsigned Factor = DstWidth / SrcWidth;
15704
+ unsigned MaskLen = NumElts * Factor;
15705
+
15706
+ Mask.clear();
15707
+ Mask.resize(MaskLen, NumElts);
15708
+
15709
+ unsigned SrcIndex = 0;
15710
+ for (unsigned I = 0; I < MaskLen; I += Factor)
15711
+ Mask[I] = SrcIndex++;
15712
+
15713
+ if (!IsLittleEndian)
15714
+ std::rotate(Mask.rbegin(), Mask.rbegin() + Factor - 1, Mask.rend());
15715
+
15716
+ return true;
15717
+ }
15718
+
15719
+ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op,
15720
+ FixedVectorType *ZExtTy,
15721
+ FixedVectorType *DstTy,
15722
+ bool IsLittleEndian) {
15723
+ auto *SrcTy = cast<FixedVectorType>(Op->getType());
15707
15724
unsigned NumElts = SrcTy->getNumElements();
15708
- IRBuilder<> Builder(ZExt);
15725
+ auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15726
+ auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15727
+
15709
15728
SmallVector<int> Mask;
15710
- // Create a mask that selects <0,...,Op[i]> for each lane of the destination
15711
- // vector to replace the original ZExt. This can later be lowered to a set of
15712
- // tbl instructions.
15713
- for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
15714
- if (IsLittleEndian) {
15715
- if (i % ZExtFactor == 0)
15716
- Mask.push_back(i / ZExtFactor);
15717
- else
15718
- Mask.push_back(NumElts);
15719
- } else {
15720
- if ((i + 1) % ZExtFactor == 0)
15721
- Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
15722
- else
15723
- Mask.push_back(NumElts);
15724
- }
15725
- }
15729
+ if (!createTblShuffleMask(SrcWidth, DstWidth, NumElts, IsLittleEndian, Mask))
15730
+ return nullptr;
15726
15731
15727
15732
auto *FirstEltZero = Builder.CreateInsertElement(
15728
15733
PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
15729
15734
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
15730
15735
Result = Builder.CreateBitCast(Result, DstTy);
15731
- if (DstTy != ZExt->getType())
15732
- Result = Builder.CreateZExt(Result, ZExt->getType());
15733
- ZExt->replaceAllUsesWith(Result);
15734
- ZExt->eraseFromParent();
15735
- return true;
15736
+ if (DstTy != ZExtTy)
15737
+ Result = Builder.CreateZExt(Result, ZExtTy);
15738
+ return Result;
15736
15739
}
15737
15740
15738
15741
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
@@ -15897,21 +15900,30 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
15897
15900
15898
15901
DstTy = TruncDstType;
15899
15902
}
15900
-
15901
- return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian());
15903
+ IRBuilder<> Builder(ZExt);
15904
+ Value *Result = createTblShuffleForZExt(
15905
+ Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
15906
+ DstTy, Subtarget->isLittleEndian());
15907
+ if (!Result)
15908
+ return false;
15909
+ ZExt->replaceAllUsesWith(Result);
15910
+ ZExt->eraseFromParent();
15911
+ return true;
15902
15912
}
15903
15913
15904
15914
auto *UIToFP = dyn_cast<UIToFPInst>(I);
15905
15915
if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
15906
15916
DstTy->getElementType()->isFloatTy()) {
15907
15917
IRBuilder<> Builder(I);
15908
- auto *ZExt = cast<ZExtInst>(
15909
- Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
15918
+ Value *ZExt = createTblShuffleForZExt(
15919
+ Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
15920
+ FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
15921
+ if (!ZExt)
15922
+ return false;
15910
15923
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
15911
15924
I->replaceAllUsesWith(UI);
15912
15925
I->eraseFromParent();
15913
- return createTblShuffleForZExt(ZExt, cast<FixedVectorType>(ZExt->getType()),
15914
- Subtarget->isLittleEndian());
15926
+ return true;
15915
15927
}
15916
15928
15917
15929
// Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
0 commit comments