diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 394b741f1c1d0..c5c3ef02115ec 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15824,48 +15824,49 @@ bool AArch64TargetLowering::shouldSinkOperands( return false; } -static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy, - bool IsLittleEndian) { - Value *Op = ZExt->getOperand(0); - auto *SrcTy = cast(Op->getType()); - auto SrcWidth = cast(SrcTy->getElementType())->getBitWidth(); - auto DstWidth = cast(DstTy->getElementType())->getBitWidth(); +static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth, + unsigned NumElts, bool IsLittleEndian, + SmallVectorImpl &Mask) { if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64) return false; assert(DstWidth % SrcWidth == 0 && - "TBL lowering is not supported for a ZExt instruction with this " - "source & destination element type."); - unsigned ZExtFactor = DstWidth / SrcWidth; + "TBL lowering is not supported for a conversion instruction with this " + "source and destination element type."); + + unsigned Factor = DstWidth / SrcWidth; + unsigned MaskLen = NumElts * Factor; + + Mask.clear(); + Mask.resize(MaskLen, NumElts); + + unsigned SrcIndex = 0; + for (unsigned I = IsLittleEndian ? 0 : Factor - 1; I < MaskLen; I += Factor) + Mask[I] = SrcIndex++; + + return true; +} + +static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op, + FixedVectorType *ZExtTy, + FixedVectorType *DstTy, + bool IsLittleEndian) { + auto *SrcTy = cast(Op->getType()); unsigned NumElts = SrcTy->getNumElements(); - IRBuilder<> Builder(ZExt); + auto SrcWidth = cast(SrcTy->getElementType())->getBitWidth(); + auto DstWidth = cast(DstTy->getElementType())->getBitWidth(); + SmallVector Mask; - // Create a mask that selects <0,...,Op[i]> for each lane of the destination - // vector to replace the original ZExt. This can later be lowered to a set of - // tbl instructions. - for (unsigned i = 0; i < NumElts * ZExtFactor; i++) { - if (IsLittleEndian) { - if (i % ZExtFactor == 0) - Mask.push_back(i / ZExtFactor); - else - Mask.push_back(NumElts); - } else { - if ((i + 1) % ZExtFactor == 0) - Mask.push_back((i - ZExtFactor + 1) / ZExtFactor); - else - Mask.push_back(NumElts); - } - } + if (!createTblShuffleMask(SrcWidth, DstWidth, NumElts, IsLittleEndian, Mask)) + return nullptr; auto *FirstEltZero = Builder.CreateInsertElement( PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0)); Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask); Result = Builder.CreateBitCast(Result, DstTy); - if (DstTy != ZExt->getType()) - Result = Builder.CreateZExt(Result, ZExt->getType()); - ZExt->replaceAllUsesWith(Result); - ZExt->eraseFromParent(); - return true; + if (DstTy != ZExtTy) + Result = Builder.CreateZExt(Result, ZExtTy); + return Result; } static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) { @@ -16030,21 +16031,30 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( DstTy = TruncDstType; } - - return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian()); + IRBuilder<> Builder(ZExt); + Value *Result = createTblShuffleForZExt( + Builder, ZExt->getOperand(0), cast(ZExt->getType()), + DstTy, Subtarget->isLittleEndian()); + if (!Result) + return false; + ZExt->replaceAllUsesWith(Result); + ZExt->eraseFromParent(); + return true; } auto *UIToFP = dyn_cast(I); if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) && DstTy->getElementType()->isFloatTy()) { IRBuilder<> Builder(I); - auto *ZExt = cast( - Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy))); + Value *ZExt = createTblShuffleForZExt( + Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy), + FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian()); + if (!ZExt) + return false; auto *UI = Builder.CreateUIToFP(ZExt, DstTy); I->replaceAllUsesWith(UI); I->eraseFromParent(); - return createTblShuffleForZExt(ZExt, cast(ZExt->getType()), - Subtarget->isLittleEndian()); + return true; } // Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui